From 23752ababd7243622b4643926e85fd78d14a8392 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 22 Feb 2024 21:12:46 +0300 Subject: [PATCH 001/534] EDAC/synopsys: Fix ECC status and IRQ control race condition [ Upstream commit 591c946675d88dcc0ae9ff54be9d5caaee8ce1e3 ] The race condition around the ECCCLR register access happens in the IRQ disable method called in the device remove() procedure and in the ECC IRQ handler: 1. Enable IRQ: a. ECCCLR = EN_CE | EN_UE 2. Disable IRQ: a. ECCCLR = 0 3. IRQ handler: a. ECCCLR = CLR_CE | CLR_CE_CNT | CLR_CE | CLR_CE_CNT b. ECCCLR = 0 c. ECCCLR = EN_CE | EN_UE So if the IRQ disabling procedure is called concurrently with the IRQ handler method the IRQ might be actually left enabled due to the statement 3c. The root cause of the problem is that ECCCLR register (which since v3.10a has been called as ECCCTL) has intermixed ECC status data clear flags and the IRQ enable/disable flags. Thus the IRQ disabling (clear EN flags) and handling (write 1 to clear ECC status data) procedures must be serialised around the ECCCTL register modification to prevent the race. So fix the problem described above by adding the spin-lock around the ECCCLR modifications and preventing the IRQ-handler from modifying the IRQs enable flags (there is no point in disabling the IRQ and then re-enabling it again within a single IRQ handler call, see the statements 3a/3b and 3c above). Fixes: f7824ded4149 ("EDAC/synopsys: Add support for version 3 of the Synopsys EDAC DDR") Signed-off-by: Serge Semin Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240222181324.28242-2-fancer.lancer@gmail.com Stable-dep-of: 35e6dbfe1846 ("EDAC/synopsys: Fix error injection on Zynq UltraScale+") Signed-off-by: Sasha Levin --- drivers/edac/synopsys_edac.c | 50 ++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index c4fc64cbecd0..1ab2f737e4f1 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -299,6 +300,7 @@ struct synps_ecc_status { /** * struct synps_edac_priv - DDR memory controller private instance data. * @baseaddr: Base address of the DDR controller. + * @reglock: Concurrent CSRs access lock. * @message: Buffer for framing the event specific info. * @stat: ECC status information. * @p_data: Platform data. @@ -313,6 +315,7 @@ struct synps_ecc_status { */ struct synps_edac_priv { void __iomem *baseaddr; + spinlock_t reglock; char message[SYNPS_EDAC_MSG_SIZE]; struct synps_ecc_status stat; const struct synps_platform_data *p_data; @@ -408,7 +411,8 @@ out: static int zynqmp_get_error_info(struct synps_edac_priv *priv) { struct synps_ecc_status *p; - u32 regval, clearval = 0; + u32 regval, clearval; + unsigned long flags; void __iomem *base; base = priv->baseaddr; @@ -452,10 +456,14 @@ ue_err: p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK); p->ueinfo.data = readl(base + ECC_UESYND0_OFST); out: - clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT; - clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; + spin_lock_irqsave(&priv->reglock, flags); + + clearval = readl(base + ECC_CLR_OFST) | + ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT | + ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT; writel(clearval, base + ECC_CLR_OFST); - writel(0x0, base + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); return 0; } @@ -515,24 +523,41 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) static void enable_intr(struct synps_edac_priv *priv) { + unsigned long flags; + /* Enable UE/CE Interrupts */ - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) - writel(DDR_UE_MASK | DDR_CE_MASK, - priv->baseaddr + ECC_CLR_OFST); - else + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(DDR_UE_MASK | DDR_CE_MASK, + priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); } static void disable_intr(struct synps_edac_priv *priv) { + unsigned long flags; + /* Disable UE/CE Interrupts */ - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) - writel(0x0, priv->baseaddr + ECC_CLR_OFST); - else + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, priv->baseaddr + DDR_QOS_IRQ_DB_OFST); + + return; + } + + spin_lock_irqsave(&priv->reglock, flags); + + writel(0, priv->baseaddr + ECC_CLR_OFST); + + spin_unlock_irqrestore(&priv->reglock, flags); } /** @@ -576,8 +601,6 @@ static irqreturn_t intr_handler(int irq, void *dev_id) /* v3.0 of the controller does not have this register */ if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); - else - enable_intr(priv); return IRQ_HANDLED; } @@ -1359,6 +1382,7 @@ static int mc_probe(struct platform_device *pdev) priv = mci->pvt_info; priv->baseaddr = baseaddr; priv->p_data = p_data; + spin_lock_init(&priv->reglock); mc_init(mci, pdev); From b3e360e00d21badf3c19da63110020cd03f60415 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 11 Jul 2024 15:36:56 +0530 Subject: [PATCH 002/534] EDAC/synopsys: Fix error injection on Zynq UltraScale+ [ Upstream commit 35e6dbfe1846caeafabb49b7575adb36b0aa2269 ] The Zynq UltraScale+ MPSoC DDR has a disjoint memory from 2GB to 32GB. The DDR host interface has a contiguous memory so while injecting errors, the driver should remove the hole else the injection fails as the address translation is incorrect. Introduce a get_mem_info() function pointer and set it for Zynq UltraScale+ platform to return host address. Fixes: 1a81361f75d8 ("EDAC, synopsys: Add Error Injection support for ZynqMP DDR controller") Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240711100656.31376-1-shubhrajyoti.datta@amd.com Signed-off-by: Sasha Levin --- drivers/edac/synopsys_edac.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 1ab2f737e4f1..6ddc90d7ba7c 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -337,6 +338,7 @@ struct synps_edac_priv { * @get_mtype: Get mtype. * @get_dtype: Get dtype. * @get_ecc_state: Get ECC state. + * @get_mem_info: Get EDAC memory info * @quirks: To differentiate IPs. */ struct synps_platform_data { @@ -344,6 +346,9 @@ struct synps_platform_data { enum mem_type (*get_mtype)(const void __iomem *base); enum dev_type (*get_dtype)(const void __iomem *base); bool (*get_ecc_state)(void __iomem *base); +#ifdef CONFIG_EDAC_DEBUG + u64 (*get_mem_info)(struct synps_edac_priv *priv); +#endif int quirks; }; @@ -402,6 +407,25 @@ out: return 0; } +#ifdef CONFIG_EDAC_DEBUG +/** + * zynqmp_get_mem_info - Get the current memory info. + * @priv: DDR memory controller private instance data. + * + * Return: host interface address. + */ +static u64 zynqmp_get_mem_info(struct synps_edac_priv *priv) +{ + u64 hif_addr = 0, linear_addr; + + linear_addr = priv->poison_addr; + if (linear_addr >= SZ_32G) + linear_addr = linear_addr - SZ_32G + SZ_2G; + hif_addr = linear_addr >> 3; + return hif_addr; +} +#endif + /** * zynqmp_get_error_info - Get the current ECC error info. * @priv: DDR memory controller private instance data. @@ -922,6 +946,9 @@ static const struct synps_platform_data zynqmp_edac_def = { .get_mtype = zynqmp_get_mtype, .get_dtype = zynqmp_get_dtype, .get_ecc_state = zynqmp_get_ecc_state, +#ifdef CONFIG_EDAC_DEBUG + .get_mem_info = zynqmp_get_mem_info, +#endif .quirks = (DDR_ECC_INTR_SUPPORT #ifdef CONFIG_EDAC_DEBUG | DDR_ECC_DATA_POISON_SUPPORT @@ -975,10 +1002,16 @@ MODULE_DEVICE_TABLE(of, synps_edac_match); static void ddr_poison_setup(struct synps_edac_priv *priv) { int col = 0, row = 0, bank = 0, bankgrp = 0, rank = 0, regval; + const struct synps_platform_data *p_data; int index; ulong hif_addr = 0; - hif_addr = priv->poison_addr >> 3; + p_data = priv->p_data; + + if (p_data->get_mem_info) + hif_addr = p_data->get_mem_info(priv); + else + hif_addr = priv->poison_addr >> 3; for (index = 0; index < DDR_MAX_ROW_SHIFT; index++) { if (priv->row_shift[index]) From 1b8178a2ae272256ea0dc4f940320a81003535e2 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 26 Jul 2024 14:46:57 +0300 Subject: [PATCH 003/534] wifi: rtw88: always wait for both firmware loading attempts [ Upstream commit 0e735a4c6137262bcefe45bb52fde7b1f5fc6c4d ] In 'rtw_wait_firmware_completion()', always wait for both (regular and wowlan) firmware loading attempts. Otherwise if 'rtw_usb_intf_init()' has failed in 'rtw_usb_probe()', 'rtw_usb_disconnect()' may issue 'ieee80211_free_hw()' when one of 'rtw_load_firmware_cb()' (usually the wowlan one) is still in progress, causing UAF detected by KASAN. Fixes: c8e5695eae99 ("rtw88: load wowlan firmware if wowlan is supported") Reported-by: syzbot+6c6c08700f9480c41fe3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6c6c08700f9480c41fe3 Signed-off-by: Dmitry Antipov Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240726114657.25396-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 63673005c2fb..b90ea6c88b15 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1314,20 +1314,21 @@ static int rtw_wait_firmware_completion(struct rtw_dev *rtwdev) { const struct rtw_chip_info *chip = rtwdev->chip; struct rtw_fw_state *fw; + int ret = 0; fw = &rtwdev->fw; wait_for_completion(&fw->completion); if (!fw->firmware) - return -EINVAL; + ret = -EINVAL; if (chip->wow_fw_name) { fw = &rtwdev->wow_fw; wait_for_completion(&fw->completion); if (!fw->firmware) - return -EINVAL; + ret = -EINVAL; } - return 0; + return ret; } static enum rtw_lps_deep_mode rtw_update_lps_deep_mode(struct rtw_dev *rtwdev, From e55fcc821db03e3edf102d60ea284d4cf541ab6a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 8 Jul 2024 14:24:52 +0200 Subject: [PATCH 004/534] crypto: xor - fix template benchmarking [ Upstream commit ab9a244c396aae4aaa34b2399b82fc15ec2df8c1 ] Commit c055e3eae0f1 ("crypto: xor - use ktime for template benchmarking") switched from using jiffies to ktime-based performance benchmarking. This works nicely on machines which have a fine-grained ktime() clocksource as e.g. x86 machines with TSC. But other machines, e.g. my 4-way HP PARISC server, don't have such fine-grained clocksources, which is why it seems that 800 xor loops take zero seconds, which then shows up in the logs as: xor: measuring software checksum speed 8regs : -1018167296 MB/sec 8regs_prefetch : -1018167296 MB/sec 32regs : -1018167296 MB/sec 32regs_prefetch : -1018167296 MB/sec Fix this with some small modifications to the existing code to improve the algorithm to always produce correct results without introducing major delays for architectures with a fine-grained ktime() clocksource: a) Delay start of the timing until ktime() just advanced. On machines with a fast ktime() this should be just one additional ktime() call. b) Count the number of loops. Run at minimum 800 loops and finish earliest when the ktime() counter has progressed. With that the throughput can now be calculated more accurately under all conditions. Fixes: c055e3eae0f1 ("crypto: xor - use ktime for template benchmarking") Signed-off-by: Helge Deller Tested-by: John David Anglin v2: - clean up coding style (noticed & suggested by Herbert Xu) - rephrased & fixed typo in commit message Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/xor.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/crypto/xor.c b/crypto/xor.c index 8e72e5d5db0d..56aa3169e871 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -83,33 +83,30 @@ static void __init do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) { int speed; - int i, j; - ktime_t min, start, diff; + unsigned long reps; + ktime_t min, start, t0; tmpl->next = template_list; template_list = tmpl; preempt_disable(); - min = (ktime_t)S64_MAX; - for (i = 0; i < 3; i++) { - start = ktime_get(); - for (j = 0; j < REPS; j++) { - mb(); /* prevent loop optimization */ - tmpl->do_2(BENCH_SIZE, b1, b2); - mb(); - } - diff = ktime_sub(ktime_get(), start); - if (diff < min) - min = diff; - } + reps = 0; + t0 = ktime_get(); + /* delay start until time has advanced */ + while ((start = ktime_get()) == t0) + cpu_relax(); + do { + mb(); /* prevent loop optimization */ + tmpl->do_2(BENCH_SIZE, b1, b2); + mb(); + } while (reps++ < REPS || (t0 = ktime_get()) == start); + min = ktime_sub(t0, start); preempt_enable(); // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] - if (!min) - min = 1; - speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); + speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); tmpl->speed = speed; pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); From 9349283fc6b834286fe3e5d6e739361939ac8d94 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Wed, 31 Jul 2024 01:53:39 +0300 Subject: [PATCH 005/534] ACPI: PMIC: Remove unneeded check in tps68470_pmic_opregion_probe() [ Upstream commit 07442c46abad1d50ac82af5e0f9c5de2732c4592 ] In tps68470_pmic_opregion_probe() pointer 'dev' is compared to NULL which is useless. Fix this issue by removing unneeded check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: e13452ac3790 ("ACPI / PMIC: Add TI PMIC TPS68470 operation region driver") Suggested-by: Andy Shevchenko Signed-off-by: Aleksandr Mishin Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240730225339.13165-1-amishin@t-argos.ru [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/pmic/tps68470_pmic.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/pmic/tps68470_pmic.c b/drivers/acpi/pmic/tps68470_pmic.c index ebd03e472955..0d1a82eeb4b0 100644 --- a/drivers/acpi/pmic/tps68470_pmic.c +++ b/drivers/acpi/pmic/tps68470_pmic.c @@ -376,10 +376,8 @@ static int tps68470_pmic_opregion_probe(struct platform_device *pdev) struct tps68470_pmic_opregion *opregion; acpi_status status; - if (!dev || !tps68470_regmap) { - dev_warn(dev, "dev or regmap is NULL\n"); - return -EINVAL; - } + if (!tps68470_regmap) + return dev_err_probe(dev, -EINVAL, "regmap is missing\n"); if (!handle) { dev_warn(dev, "acpi handle is NULL\n"); From c3cfcf51b4e018daf1d8cb9aebbf491ce538b1a5 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 3 Jan 2024 10:57:01 +0100 Subject: [PATCH 006/534] wifi: brcmfmac: export firmware interface functions [ Upstream commit 31343230abb1683e8afb254e6b13a7a7fd01fcac ] With multi-vendor support the vendor-specific module may need to use the firmware interface functions so export them using the macro BRCMF_EXPORT_SYMBOL_GPL() which exports them to driver namespace. Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240103095704.135651-2-arend.vanspriel@broadcom.com Stable-dep-of: c6002b6c05f3 ("wifi: brcmfmac: introducing fwil query functions") Signed-off-by: Sasha Levin --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 4 +- .../broadcom/brcm80211/brcmfmac/core.c | 2 +- .../broadcom/brcm80211/brcmfmac/feature.c | 2 +- .../broadcom/brcm80211/brcmfmac/fwil.c | 115 +--------------- .../broadcom/brcm80211/brcmfmac/fwil.h | 125 +++++++++++++++--- 5 files changed, 120 insertions(+), 128 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index da4968e66725..fb91ebe5553e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3067,7 +3067,7 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp, struct brcmf_scb_val_le scbval; struct brcmf_pktcnt_le pktcnt; s32 err; - u32 rate; + u32 rate = 0; u32 rssi; /* Get the current tx rate */ @@ -7269,7 +7269,7 @@ static int brcmf_setup_wiphybands(struct brcmf_cfg80211_info *cfg) u32 nmode = 0; u32 vhtmode = 0; u32 bw_cap[2] = { WLC_BW_20MHZ_BIT, WLC_BW_20MHZ_BIT }; - u32 rxchain; + u32 rxchain = 0; u32 nchain; int err; s32 i; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index f599d5f896e8..a92f78026cfd 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -691,7 +691,7 @@ static int brcmf_net_mon_open(struct net_device *ndev) { struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_pub *drvr = ifp->drvr; - u32 monitor; + u32 monitor = 0; int err; brcmf_dbg(TRACE, "Enter\n"); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index 909a34a1ab50..7fef93ede0fb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -184,7 +184,7 @@ static void brcmf_feat_wlc_version_overrides(struct brcmf_pub *drv) static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, enum brcmf_feat_id id, char *name) { - u32 data; + u32 data = 0; int err; /* we need to know firmware error */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c index a9514d72f770..bc1c6b5a6e31 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c @@ -142,6 +142,7 @@ brcmf_fil_cmd_data_set(struct brcmf_if *ifp, u32 cmd, void *data, u32 len) return err; } +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_cmd_data_set); s32 brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len) @@ -160,36 +161,7 @@ brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len) return err; } - - -s32 -brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data) -{ - s32 err; - __le32 data_le = cpu_to_le32(data); - - mutex_lock(&ifp->drvr->proto_block); - brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, data); - err = brcmf_fil_cmd_data(ifp, cmd, &data_le, sizeof(data_le), true); - mutex_unlock(&ifp->drvr->proto_block); - - return err; -} - -s32 -brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data) -{ - s32 err; - __le32 data_le = cpu_to_le32(*data); - - mutex_lock(&ifp->drvr->proto_block); - err = brcmf_fil_cmd_data(ifp, cmd, &data_le, sizeof(data_le), false); - mutex_unlock(&ifp->drvr->proto_block); - *data = le32_to_cpu(data_le); - brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, *data); - - return err; -} +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_cmd_data_get); static u32 brcmf_create_iovar(const char *name, const char *data, u32 datalen, @@ -271,26 +243,7 @@ brcmf_fil_iovar_data_get(struct brcmf_if *ifp, const char *name, void *data, mutex_unlock(&drvr->proto_block); return err; } - -s32 -brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data) -{ - __le32 data_le = cpu_to_le32(data); - - return brcmf_fil_iovar_data_set(ifp, name, &data_le, sizeof(data_le)); -} - -s32 -brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data) -{ - __le32 data_le = cpu_to_le32(*data); - s32 err; - - err = brcmf_fil_iovar_data_get(ifp, name, &data_le, sizeof(data_le)); - if (err == 0) - *data = le32_to_cpu(data_le); - return err; -} +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_iovar_data_get); static u32 brcmf_create_bsscfg(s32 bsscfgidx, const char *name, char *data, u32 datalen, @@ -365,6 +318,7 @@ brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name, mutex_unlock(&drvr->proto_block); return err; } +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_bsscfg_data_set); s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name, @@ -395,28 +349,7 @@ brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name, mutex_unlock(&drvr->proto_block); return err; } - -s32 -brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data) -{ - __le32 data_le = cpu_to_le32(data); - - return brcmf_fil_bsscfg_data_set(ifp, name, &data_le, - sizeof(data_le)); -} - -s32 -brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data) -{ - __le32 data_le = cpu_to_le32(*data); - s32 err; - - err = brcmf_fil_bsscfg_data_get(ifp, name, &data_le, - sizeof(data_le)); - if (err == 0) - *data = le32_to_cpu(data_le); - return err; -} +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_bsscfg_data_get); static u32 brcmf_create_xtlv(const char *name, u16 id, char *data, u32 len, char *buf, u32 buflen) @@ -466,6 +399,7 @@ s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, const char *name, u16 id, mutex_unlock(&drvr->proto_block); return err; } +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_set); s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id, void *data, u32 len) @@ -495,39 +429,4 @@ s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id, mutex_unlock(&drvr->proto_block); return err; } - -s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id, u32 data) -{ - __le32 data_le = cpu_to_le32(data); - - return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le, - sizeof(data_le)); -} - -s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id, u32 *data) -{ - __le32 data_le = cpu_to_le32(*data); - s32 err; - - err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le)); - if (err == 0) - *data = le32_to_cpu(data_le); - return err; -} - -s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id, u8 *data) -{ - return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data)); -} - -s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id, u16 *data) -{ - __le16 data_le = cpu_to_le16(*data); - s32 err; - - err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le)); - if (err == 0) - *data = le16_to_cpu(data_le); - return err; -} - +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_get); \ No newline at end of file diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h index bc693157c4b1..a315a7fac6a0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h @@ -81,29 +81,122 @@ s32 brcmf_fil_cmd_data_set(struct brcmf_if *ifp, u32 cmd, void *data, u32 len); s32 brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len); -s32 brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data); -s32 brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data); +static inline +s32 brcmf_fil_cmd_int_set(struct brcmf_if *ifp, u32 cmd, u32 data) +{ + s32 err; + __le32 data_le = cpu_to_le32(data); -s32 brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name, const void *data, - u32 len); + brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, data); + err = brcmf_fil_cmd_data_set(ifp, cmd, &data_le, sizeof(data_le)); + + return err; +} +static inline +s32 brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data) +{ + s32 err; + __le32 data_le = cpu_to_le32(*data); + + err = brcmf_fil_cmd_data_get(ifp, cmd, &data_le, sizeof(data_le)); + if (err == 0) + *data = le32_to_cpu(data_le); + brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, *data); + + return err; +} + +s32 brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name, + const void *data, u32 len); s32 brcmf_fil_iovar_data_get(struct brcmf_if *ifp, const char *name, void *data, u32 len); -s32 brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data); -s32 brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data); +static inline +s32 brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data) +{ + __le32 data_le = cpu_to_le32(data); + + return brcmf_fil_iovar_data_set(ifp, name, &data_le, sizeof(data_le)); +} +static inline +s32 brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data) +{ + __le32 data_le = cpu_to_le32(*data); + s32 err; + + err = brcmf_fil_iovar_data_get(ifp, name, &data_le, sizeof(data_le)); + if (err == 0) + *data = le32_to_cpu(data_le); + return err; +} + + +s32 brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name, + void *data, u32 len); +s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name, + void *data, u32 len); +static inline +s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data) +{ + __le32 data_le = cpu_to_le32(data); + + return brcmf_fil_bsscfg_data_set(ifp, name, &data_le, + sizeof(data_le)); +} +static inline +s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data) +{ + __le32 data_le = cpu_to_le32(*data); + s32 err; + + err = brcmf_fil_bsscfg_data_get(ifp, name, &data_le, + sizeof(data_le)); + if (err == 0) + *data = le32_to_cpu(data_le); + return err; +} -s32 brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name, void *data, - u32 len); -s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, const char *name, void *data, - u32 len); -s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data); -s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data); s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, const char *name, u16 id, void *data, u32 len); s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id, void *data, u32 len); -s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id, u32 data); -s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id, u32 *data); -s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id, u8 *data); -s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id, u16 *data); +static inline +s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, const char *name, u16 id, + u32 data) +{ + __le32 data_le = cpu_to_le32(data); + + return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le, + sizeof(data_le)); +} +static inline +s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, const char *name, u16 id, + u32 *data) +{ + __le32 data_le = cpu_to_le32(*data); + s32 err; + + err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le)); + if (err == 0) + *data = le32_to_cpu(data_le); + return err; +} +static inline +s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, const char *name, u16 id, + u8 *data) +{ + return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data)); +} +static inline +s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, const char *name, u16 id, + u16 *data) +{ + __le16 data_le = cpu_to_le16(*data); + s32 err; + + err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le)); + if (err == 0) + *data = le16_to_cpu(data_le); + return err; +} #endif /* _fwil_h_ */ From fb1862ce266479b7e0229ff2ce46f6433dcd3807 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sat, 27 Jul 2024 20:56:17 +0200 Subject: [PATCH 007/534] wifi: brcmfmac: introducing fwil query functions [ Upstream commit c6002b6c05f3edfa12fd25990cc637281f200442 ] When the firmware interface layer was refactored it provided various "get" and "set" functions. For the "get" in some cases a parameter needed to be passed down to firmware as a key indicating what to "get" turning the output parameter of the "get" function into an input parameter as well. To accommodate this the "get" function blindly copies the parameter which in some places resulted in an uninitialized warnings from the compiler. These have been fixed by initializing the input parameter in the past. Recently another batch of similar fixes were submitted to address clang static checker warnings [1]. Proposing another solution by introducing a "query" variant which is used when the (input) parameter is needed by firmware. The "get" variant will only fill the (output) parameter with the result received from firmware taking care of proper endianess conversion. [1] https://lore.kernel.org/all/20240702122450.2213833-1-suhui@nfschina.com/ Fixes: 81f5dcb80830 ("brcmfmac: refactor firmware interface layer.") Reported-by: Su Hui Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240727185617.253210-1-arend.vanspriel@broadcom.com Signed-off-by: Sasha Levin --- .../broadcom/brcm80211/brcmfmac/btcoex.c | 2 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 30 +++++++------- .../broadcom/brcm80211/brcmfmac/core.c | 2 +- .../broadcom/brcm80211/brcmfmac/feature.c | 2 +- .../broadcom/brcm80211/brcmfmac/fwil.h | 40 ++++++++++++++----- 5 files changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c index 7ea2631b8069..00794086cc7c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c @@ -123,7 +123,7 @@ static s32 brcmf_btcoex_params_read(struct brcmf_if *ifp, u32 addr, u32 *data) { *data = addr; - return brcmf_fil_iovar_int_get(ifp, "btc_params", data); + return brcmf_fil_iovar_int_query(ifp, "btc_params", data); } /** diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index fb91ebe5553e..c708ae91c3ce 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -663,8 +663,8 @@ static int brcmf_cfg80211_request_sta_if(struct brcmf_if *ifp, u8 *macaddr) /* interface_create version 3+ */ /* get supported version from firmware side */ iface_create_ver = 0; - err = brcmf_fil_bsscfg_int_get(ifp, "interface_create", - &iface_create_ver); + err = brcmf_fil_bsscfg_int_query(ifp, "interface_create", + &iface_create_ver); if (err) { brcmf_err("fail to get supported version, err=%d\n", err); return -EOPNOTSUPP; @@ -756,8 +756,8 @@ static int brcmf_cfg80211_request_ap_if(struct brcmf_if *ifp) /* interface_create version 3+ */ /* get supported version from firmware side */ iface_create_ver = 0; - err = brcmf_fil_bsscfg_int_get(ifp, "interface_create", - &iface_create_ver); + err = brcmf_fil_bsscfg_int_query(ifp, "interface_create", + &iface_create_ver); if (err) { brcmf_err("fail to get supported version, err=%d\n", err); return -EOPNOTSUPP; @@ -2101,7 +2101,8 @@ brcmf_set_key_mgmt(struct net_device *ndev, struct cfg80211_connect_params *sme) if (!sme->crypto.n_akm_suites) return 0; - err = brcmf_fil_bsscfg_int_get(netdev_priv(ndev), "wpa_auth", &val); + err = brcmf_fil_bsscfg_int_get(netdev_priv(ndev), + "wpa_auth", &val); if (err) { bphy_err(drvr, "could not get wpa_auth (%d)\n", err); return err; @@ -2680,7 +2681,7 @@ brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_cfg80211_vif *vif = wdev_to_vif(wdev); struct brcmf_pub *drvr = cfg->pub; - s32 qdbm = 0; + s32 qdbm; s32 err; brcmf_dbg(TRACE, "Enter\n"); @@ -3067,7 +3068,7 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp, struct brcmf_scb_val_le scbval; struct brcmf_pktcnt_le pktcnt; s32 err; - u32 rate = 0; + u32 rate; u32 rssi; /* Get the current tx rate */ @@ -7046,8 +7047,8 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg, ch.bw = BRCMU_CHAN_BW_20; cfg->d11inf.encchspec(&ch); chaninfo = ch.chspec; - err = brcmf_fil_bsscfg_int_get(ifp, "per_chan_info", - &chaninfo); + err = brcmf_fil_bsscfg_int_query(ifp, "per_chan_info", + &chaninfo); if (!err) { if (chaninfo & WL_CHAN_RADAR) channel->flags |= @@ -7081,7 +7082,7 @@ static int brcmf_enable_bw40_2g(struct brcmf_cfg80211_info *cfg) /* verify support for bw_cap command */ val = WLC_BAND_5G; - err = brcmf_fil_iovar_int_get(ifp, "bw_cap", &val); + err = brcmf_fil_iovar_int_query(ifp, "bw_cap", &val); if (!err) { /* only set 2G bandwidth using bw_cap command */ @@ -7157,11 +7158,11 @@ static void brcmf_get_bwcap(struct brcmf_if *ifp, u32 bw_cap[]) int err; band = WLC_BAND_2G; - err = brcmf_fil_iovar_int_get(ifp, "bw_cap", &band); + err = brcmf_fil_iovar_int_query(ifp, "bw_cap", &band); if (!err) { bw_cap[NL80211_BAND_2GHZ] = band; band = WLC_BAND_5G; - err = brcmf_fil_iovar_int_get(ifp, "bw_cap", &band); + err = brcmf_fil_iovar_int_query(ifp, "bw_cap", &band); if (!err) { bw_cap[NL80211_BAND_5GHZ] = band; return; @@ -7170,7 +7171,6 @@ static void brcmf_get_bwcap(struct brcmf_if *ifp, u32 bw_cap[]) return; } brcmf_dbg(INFO, "fallback to mimo_bw_cap info\n"); - mimo_bwcap = 0; err = brcmf_fil_iovar_int_get(ifp, "mimo_bw_cap", &mimo_bwcap); if (err) /* assume 20MHz if firmware does not give a clue */ @@ -7266,10 +7266,10 @@ static int brcmf_setup_wiphybands(struct brcmf_cfg80211_info *cfg) struct brcmf_pub *drvr = cfg->pub; struct brcmf_if *ifp = brcmf_get_ifp(drvr, 0); struct wiphy *wiphy = cfg_to_wiphy(cfg); - u32 nmode = 0; + u32 nmode; u32 vhtmode = 0; u32 bw_cap[2] = { WLC_BW_20MHZ_BIT, WLC_BW_20MHZ_BIT }; - u32 rxchain = 0; + u32 rxchain; u32 nchain; int err; s32 i; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index a92f78026cfd..f599d5f896e8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -691,7 +691,7 @@ static int brcmf_net_mon_open(struct net_device *ndev) { struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_pub *drvr = ifp->drvr; - u32 monitor = 0; + u32 monitor; int err; brcmf_dbg(TRACE, "Enter\n"); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index 7fef93ede0fb..909a34a1ab50 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -184,7 +184,7 @@ static void brcmf_feat_wlc_version_overrides(struct brcmf_pub *drv) static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, enum brcmf_feat_id id, char *name) { - u32 data = 0; + u32 data; int err; /* we need to know firmware error */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h index a315a7fac6a0..31e080e4da66 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h @@ -96,15 +96,22 @@ static inline s32 brcmf_fil_cmd_int_get(struct brcmf_if *ifp, u32 cmd, u32 *data) { s32 err; - __le32 data_le = cpu_to_le32(*data); - err = brcmf_fil_cmd_data_get(ifp, cmd, &data_le, sizeof(data_le)); + err = brcmf_fil_cmd_data_get(ifp, cmd, data, sizeof(*data)); if (err == 0) - *data = le32_to_cpu(data_le); + *data = le32_to_cpu(*(__le32 *)data); brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, *data); return err; } +static inline +s32 brcmf_fil_cmd_int_query(struct brcmf_if *ifp, u32 cmd, u32 *data) +{ + __le32 *data_le = (__le32 *)data; + + *data_le = cpu_to_le32(*data); + return brcmf_fil_cmd_int_get(ifp, cmd, data); +} s32 brcmf_fil_iovar_data_set(struct brcmf_if *ifp, const char *name, const void *data, u32 len); @@ -120,14 +127,21 @@ s32 brcmf_fil_iovar_int_set(struct brcmf_if *ifp, const char *name, u32 data) static inline s32 brcmf_fil_iovar_int_get(struct brcmf_if *ifp, const char *name, u32 *data) { - __le32 data_le = cpu_to_le32(*data); s32 err; - err = brcmf_fil_iovar_data_get(ifp, name, &data_le, sizeof(data_le)); + err = brcmf_fil_iovar_data_get(ifp, name, data, sizeof(*data)); if (err == 0) - *data = le32_to_cpu(data_le); + *data = le32_to_cpu(*(__le32 *)data); return err; } +static inline +s32 brcmf_fil_iovar_int_query(struct brcmf_if *ifp, const char *name, u32 *data) +{ + __le32 *data_le = (__le32 *)data; + + *data_le = cpu_to_le32(*data); + return brcmf_fil_iovar_int_get(ifp, name, data); +} s32 brcmf_fil_bsscfg_data_set(struct brcmf_if *ifp, const char *name, @@ -145,15 +159,21 @@ s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, const char *name, u32 data) static inline s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, const char *name, u32 *data) { - __le32 data_le = cpu_to_le32(*data); s32 err; - err = brcmf_fil_bsscfg_data_get(ifp, name, &data_le, - sizeof(data_le)); + err = brcmf_fil_bsscfg_data_get(ifp, name, data, sizeof(*data)); if (err == 0) - *data = le32_to_cpu(data_le); + *data = le32_to_cpu(*(__le32 *)data); return err; } +static inline +s32 brcmf_fil_bsscfg_int_query(struct brcmf_if *ifp, const char *name, u32 *data) +{ + __le32 *data_le = (__le32 *)data; + + *data_le = cpu_to_le32(*data); + return brcmf_fil_bsscfg_int_get(ifp, name, data); +} s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, const char *name, u16 id, void *data, u32 len); From dda028a8aa3c45cbd9ed9bf57f1768d64692461f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 5 Aug 2024 13:02:22 +0200 Subject: [PATCH 008/534] wifi: ath9k: Remove error checks when creating debugfs entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f6ffe7f0184792c2f99aca6ae5b916683973d7d3 ] We should not be checking the return values from debugfs creation at all: the debugfs functions are designed to handle errors of previously called functions and just transparently abort the creation of debugfs entries when debugfs is disabled. If we check the return value and abort driver initialisation, we break the driver if debugfs is disabled (such as when booting with debugfs=off). Earlier versions of ath9k accidentally did the right thing by checking the return value, but only for NULL, not for IS_ERR(). This was "fixed" by the two commits referenced below, breaking ath9k with debugfs=off starting from the 6.6 kernel (as reported in the Bugzilla linked below). Restore functionality by just getting rid of the return value check entirely. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219122 Fixes: 1e4134610d93 ("wifi: ath9k: use IS_ERR() with debugfs_create_dir()") Fixes: 6edb4ba6fb5b ("wifi: ath9k: fix parameter check in ath9k_init_debug()") Reported-by: Daniel Tobias Tested-by: Daniel Tobias Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240805110225.19690-1-toke@toke.dk Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/debug.c | 2 -- drivers/net/wireless/ath/ath9k/htc_drv_debug.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index a0376a6787b8..808fb6747a7f 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -1380,8 +1380,6 @@ int ath9k_init_debug(struct ath_hw *ah) sc->debug.debugfs_phy = debugfs_create_dir("ath9k", sc->hw->wiphy->debugfsdir); - if (IS_ERR(sc->debug.debugfs_phy)) - return -ENOMEM; #ifdef CONFIG_ATH_DEBUG debugfs_create_file("debug", 0600, sc->debug.debugfs_phy, diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c index 278ddc713fdc..7b1452822431 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c @@ -486,8 +486,6 @@ int ath9k_htc_init_debug(struct ath_hw *ah) priv->debug.debugfs_phy = debugfs_create_dir(KBUILD_MODNAME, priv->hw->wiphy->debugfsdir); - if (IS_ERR(priv->debug.debugfs_phy)) - return -ENOMEM; ath9k_cmn_spectral_init_debug(&priv->spec_priv, priv->debug.debugfs_phy); From d45fe0115edf96ccf2968eb2cce1bf5eba5b07c5 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 1 Apr 2024 00:02:31 +0530 Subject: [PATCH 009/534] wifi: ath12k: fix BSS chan info request WMI command [ Upstream commit 59529c982f85047650fd473db903b23006a796c6 ] Currently, the firmware returns incorrect pdev_id information in WMI_PDEV_BSS_CHAN_INFO_EVENTID, leading to incorrect filling of the pdev's survey information. To prevent this issue, when requesting BSS channel information through WMI_PDEV_BSS_CHAN_INFO_REQUEST_CMDID, firmware expects pdev_id as one of the arguments in this WMI command. Add pdev_id to the struct wmi_pdev_bss_chan_info_req_cmd and fill it during ath12k_wmi_pdev_bss_chan_info_request(). This resolves the issue of sending the correct pdev_id in WMI_PDEV_BSS_CHAN_INFO_EVENTID. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Signed-off-by: Karthikeyan Kathirvel Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240331183232.2158756-2-quic_kathirve@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/wmi.c | 1 + drivers/net/wireless/ath/ath12k/wmi.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 21399ad233c0..9105fdd14c66 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -1501,6 +1501,7 @@ int ath12k_wmi_pdev_bss_chan_info_request(struct ath12k *ar, cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_PDEV_BSS_CHAN_INFO_REQUEST, sizeof(*cmd)); cmd->req_type = cpu_to_le32(type); + cmd->pdev_id = cpu_to_le32(ar->pdev->pdev_id); ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "WMI bss chan info req type %d\n", type); diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index c75a6fa1f7e0..6549c3853be4 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -3058,6 +3058,7 @@ struct wmi_pdev_bss_chan_info_req_cmd { __le32 tlv_header; /* ref wmi_bss_chan_info_req_type */ __le32 req_type; + __le32 pdev_id; } __packed; struct wmi_ap_ps_peer_cmd { From aafd6ad1d9d6b17918ac4c34b99472738e2c6deb Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 1 Apr 2024 00:02:32 +0530 Subject: [PATCH 010/534] wifi: ath12k: match WMI BSS chan info structure with firmware definition [ Upstream commit dd98d54db29fb553839f43ade5f547baa93392c8 ] struct wmi_pdev_bss_chan_info_event is not similar to the firmware struct definition, this will cause some random failures. Fix by matching the struct wmi_pdev_bss_chan_info_event with the firmware structure definition. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Signed-off-by: Karthikeyan Kathirvel Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240331183232.2158756-3-quic_kathirve@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/wmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 6549c3853be4..a19a2c29f226 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4034,7 +4034,6 @@ struct wmi_vdev_stopped_event { } __packed; struct wmi_pdev_bss_chan_info_event { - __le32 pdev_id; __le32 freq; /* Units in MHz */ __le32 noise_floor; /* units are dBm */ /* rx clear - how often the channel was unused */ @@ -4052,6 +4051,7 @@ struct wmi_pdev_bss_chan_info_event { /*rx_cycle cnt for my bss in 64bits format */ __le32 rx_bss_cycle_count_low; __le32 rx_bss_cycle_count_high; + __le32 pdev_id; } __packed; #define WMI_VDEV_INSTALL_KEY_COMPL_STATUS_SUCCESS 0 From 0a9445aa8e8fb7c04739d2be18de098d9356abed Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Wed, 10 Jul 2024 10:18:19 +0800 Subject: [PATCH 011/534] wifi: ath12k: fix invalid AMPDU factor calculation in ath12k_peer_assoc_h_he() [ Upstream commit a66de2d0f22b1740f3f9777776ad98c4bee62dff ] Currently ampdu_factor is wrongly calculated in ath12k_peer_assoc_h_he(), fix it. This is found during code review. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Baochen Qiang Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240710021819.87216-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index dd2a7c95517b..4bb30e403728 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -1681,9 +1681,8 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, * request, then use MAX_AMPDU_LEN_FACTOR as 16 to calculate max_ampdu * length. */ - ampdu_factor = (he_cap->he_cap_elem.mac_cap_info[3] & - IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) >> - IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK; + ampdu_factor = u8_get_bits(he_cap->he_cap_elem.mac_cap_info[3], + IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK); if (ampdu_factor) { if (sta->deflink.vht_cap.vht_supported) From f0525a641a4ab50f5bd6936b3cd832799a1ae6b0 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Wed, 7 Aug 2024 21:48:02 +0800 Subject: [PATCH 012/534] net: stmmac: dwmac-loongson: Init ref and PTP clocks rate [ Upstream commit c70f3163681381c15686bdd2fe56bf4af9b8aaaa ] Reference and PTP clocks rate of the Loongson GMAC devices is 125MHz. (So is in the GNET devices which support is about to be added.) Set the respective plat_stmmacenet_data field up in accordance with that so to have the coalesce command and timestamping work correctly. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Feiyang Chen Signed-off-by: Yinggang Gu Reviewed-by: Serge Semin Acked-by: Huacai Chen Signed-off-by: Yanteng Si Tested-by: Serge Semin Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 9e40c28d453a..ee3604f58def 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -35,6 +35,9 @@ static int loongson_default_data(struct plat_stmmacenet_data *plat) /* Disable RX queues routing by default */ plat->rx_queues_cfg[0].pkt_route = 0x0; + plat->clk_ref_rate = 125000000; + plat->clk_ptp_rate = 125000000; + /* Default to phy auto-detection */ plat->phy_addr = -1; From 32ba3160889e8aced36ac8c7d49fef92f25da113 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 29 Jul 2024 16:20:05 +0100 Subject: [PATCH 013/534] arm64: signal: Fix some under-bracketed UAPI macros [ Upstream commit fc2220c9b15828319b09384e68399b4afc6276d9 ] A few SME-related sigcontext UAPI macros leave an argument unprotected from misparsing during macro expansion. Add parentheses around references to macro arguments where appropriate. Signed-off-by: Dave Martin Fixes: ee072cf70804 ("arm64/sme: Implement signal handling for ZT") Fixes: 39782210eb7e ("arm64/sme: Implement ZA signal handling") Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240729152005.289844-1-Dave.Martin@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/uapi/asm/sigcontext.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index f23c1dc3f002..8f003db7a696 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -312,10 +312,10 @@ struct zt_context { ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) +#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES)) #define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ - (SVE_SIG_ZREG_SIZE(vq) * n)) + (SVE_SIG_ZREG_SIZE(vq) * (n))) #define ZA_SIG_CONTEXT_SIZE(vq) \ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) @@ -326,7 +326,7 @@ struct zt_context { #define ZT_SIG_REGS_OFFSET sizeof(struct zt_context) -#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n) +#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n)) #define ZT_SIG_CONTEXT_SIZE(n) \ (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n)) From 07b90bbfe9c9d5c05f15bca943cad747fbc77319 Mon Sep 17 00:00:00 2001 From: Dmitry Kandybka Date: Fri, 9 Aug 2024 11:53:10 +0300 Subject: [PATCH 014/534] wifi: rtw88: remove CPT execution branch never used [ Upstream commit 77c977327dfaa9ae2e154964cdb89ceb5c7b7cf1 ] In 'rtw_coex_action_bt_a2dp_pan', 'wl_cpt_test' and 'bt_cpt_test' are hardcoded to false, so corresponding 'table_case' and 'tdma_case' assignments are never met. Also 'rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[1])' is never executed. Assuming that CPT was never fully implemented, remove lookalike leftovers. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 76f631cb401f ("rtw88: coex: update the mechanism for A2DP + PAN") Signed-off-by: Dmitry Kandybka Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240809085310.10512-1-d.kandybka@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/coex.c | 38 ++++++----------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index 86467d2f8888..d35f26919806 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -2194,7 +2194,6 @@ static void rtw_coex_action_bt_a2dp_pan(struct rtw_dev *rtwdev) struct rtw_coex_stat *coex_stat = &coex->stat; struct rtw_efuse *efuse = &rtwdev->efuse; u8 table_case, tdma_case; - bool wl_cpt_test = false, bt_cpt_test = false; rtw_dbg(rtwdev, RTW_DBG_COEX, "[BTCoex], %s()\n", __func__); @@ -2202,29 +2201,16 @@ static void rtw_coex_action_bt_a2dp_pan(struct rtw_dev *rtwdev) rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[0]); if (efuse->share_ant) { /* Shared-Ant */ - if (wl_cpt_test) { - if (coex_stat->wl_gl_busy) { - table_case = 20; - tdma_case = 17; - } else { - table_case = 10; - tdma_case = 15; - } - } else if (bt_cpt_test) { - table_case = 26; - tdma_case = 26; - } else { - if (coex_stat->wl_gl_busy && - coex_stat->wl_noisy_level == 0) - table_case = 14; - else - table_case = 10; + if (coex_stat->wl_gl_busy && + coex_stat->wl_noisy_level == 0) + table_case = 14; + else + table_case = 10; - if (coex_stat->wl_gl_busy) - tdma_case = 15; - else - tdma_case = 20; - } + if (coex_stat->wl_gl_busy) + tdma_case = 15; + else + tdma_case = 20; } else { /* Non-Shared-Ant */ table_case = 112; @@ -2235,11 +2221,7 @@ static void rtw_coex_action_bt_a2dp_pan(struct rtw_dev *rtwdev) tdma_case = 120; } - if (wl_cpt_test) - rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[1]); - else - rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[0]); - + rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[0]); rtw_coex_table(rtwdev, false, table_case); rtw_coex_tdma(rtwdev, false, tdma_case); } From a72a99da7a8f63b3d845e702ae553bae3b7888e2 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 7 Aug 2024 17:49:44 +0200 Subject: [PATCH 015/534] RISC-V: KVM: Fix sbiret init before forwarding to userspace [ Upstream commit 6b7b282e6baea06ba65b55ae7d38326ceb79cebf ] When forwarding SBI calls to userspace ensure sbiret.error is initialized to SBI_ERR_NOT_SUPPORTED first, in case userspace neglects to set it to anything. If userspace neglects it then we can't be sure it did anything else either, so we just report it didn't do or try anything. Just init sbiret.value to zero, which is the preferred value to return when nothing special is specified. KVM was already initializing both sbiret.error and sbiret.value, but the values used appear to come from a copy+paste of the __sbi_ecall() implementation, i.e. a0 and a1, which don't apply prior to the call being executed, nor at all when forwarding to userspace. Fixes: dea8ee31a039 ("RISC-V: KVM: Add SBI v0.1 support") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20240807154943.150540-2-ajones@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Sasha Levin --- arch/riscv/kvm/vcpu_sbi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 9cd97091c723..7a7fe40d0930 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -91,8 +91,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) run->riscv_sbi.args[3] = cp->a3; run->riscv_sbi.args[4] = cp->a4; run->riscv_sbi.args[5] = cp->a5; - run->riscv_sbi.ret[0] = cp->a0; - run->riscv_sbi.ret[1] = cp->a1; + run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED; + run->riscv_sbi.ret[1] = 0; } void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, From 3c39f253e2c989e4d4aee58d0e33d6f79317bf33 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 16 Aug 2024 00:08:08 -0700 Subject: [PATCH 016/534] RISC-V: KVM: Allow legacy PMU access from guest [ Upstream commit 7d1ffc8b087e97dbe1985912c7a2d00e53cea169 ] Currently, KVM traps & emulates PMU counter access only if SBI PMU is available as the guest can only configure/read PMU counters via SBI only. However, if SBI PMU is not enabled in the host, the guest will fallback to the legacy PMU which will try to access cycle/instret and result in an illegal instruction trap which is not desired. KVM can allow dummy emulation of cycle/instret only for the guest if SBI PMU is not enabled in the host. The dummy emulation will still return zero as we don't to expose the host counter values from a guest using legacy PMU. Fixes: a9ac6c37521f ("RISC-V: KVM: Implement trap & emulate for hpmcounters") Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20240816-kvm_pmu_fixes-v1-1-cdfce386dd93@rivosinc.com Signed-off-by: Anup Patel Signed-off-by: Sasha Levin --- arch/riscv/include/asm/kvm_vcpu_pmu.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 395518a1664e..6823271bce32 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -10,6 +10,7 @@ #define __KVM_VCPU_RISCV_PMU_H #include +#include #include #ifdef CONFIG_RISCV_PMU_SBI @@ -92,8 +93,20 @@ void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); struct kvm_pmu { }; +static inline int kvm_riscv_vcpu_pmu_read_legacy(struct kvm_vcpu *vcpu, unsigned int csr_num, + unsigned long *val, unsigned long new_val, + unsigned long wr_mask) +{ + if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { + *val = 0; + return KVM_INSN_CONTINUE_NEXT_SEPC; + } else { + return KVM_INSN_ILLEGAL_TRAP; + } +} + #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \ -{.base = 0, .count = 0, .func = NULL }, +{.base = CSR_CYCLE, .count = 3, .func = kvm_riscv_vcpu_pmu_read_legacy }, static inline void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) {} static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) From 032ca566f58971dab57f4e4d4d9663bb5393323f Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 16 Aug 2024 00:08:09 -0700 Subject: [PATCH 017/534] RISC-V: KVM: Fix to allow hpmcounter31 from the guest [ Upstream commit 5aa09297a3dcc798d038bd7436f8c90f664045a6 ] The csr_fun defines a count parameter which defines the total number CSRs emulated in KVM starting from the base. This value should be equal to total number of counters possible for trap/emulation (32). Fixes: a9ac6c37521f ("RISC-V: KVM: Implement trap & emulate for hpmcounters") Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20240816-kvm_pmu_fixes-v1-2-cdfce386dd93@rivosinc.com Signed-off-by: Anup Patel Signed-off-by: Sasha Levin --- arch/riscv/include/asm/kvm_vcpu_pmu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 6823271bce32..a50a1d23523f 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -58,11 +58,11 @@ struct kvm_pmu { #if defined(CONFIG_32BIT) #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \ -{.base = CSR_CYCLEH, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, \ -{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, +{.base = CSR_CYCLEH, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm }, \ +{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm }, #else #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \ -{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, +{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm }, #endif int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid); From c82ea72d96ddfe6b3efa284ac8fb63b9d1665ad3 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 30 Jul 2024 10:58:13 +0200 Subject: [PATCH 018/534] mount: handle OOM on mnt_warn_timestamp_expiry [ Upstream commit 4bcda1eaf184e308f07f9c61d3a535f9ce477ce8 ] If no page could be allocated, an error pointer was used as format string in pr_warn. Rearrange the code to return early in case of OOM. Also add a check for the return value of d_path. Fixes: f8b92ba67c5d ("mount: Add mount warning for impending timestamp expiry") Signed-off-by: Olaf Hering Link: https://lore.kernel.org/r/20240730085856.32385-1-olaf@aepfle.de [brauner: rewrite commit and commit message] Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/namespace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index e6c61d4997cc..b4385e2413d5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2796,8 +2796,15 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * if (!__mnt_is_readonly(mnt) && (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) && (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { - char *buf = (char *)__get_free_page(GFP_KERNEL); - char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM); + char *buf, *mntpath; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (buf) + mntpath = d_path(mountpoint, buf, PAGE_SIZE); + else + mntpath = ERR_PTR(-ENOMEM); + if (IS_ERR(mntpath)) + mntpath = "(unknown)"; pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n", sb->s_type->name, @@ -2805,8 +2812,9 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * mntpath, &sb->s_time_max, (unsigned long long)sb->s_time_max); - free_page((unsigned long)buf); sb->s_iflags |= SB_I_TS_EXPIRY_WARNED; + if (buf) + free_page((unsigned long)buf); } } From 9fc60f2bdd43e758bdf0305c0fc83221419ddb3f Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Mon, 29 Jul 2024 17:05:51 +0100 Subject: [PATCH 019/534] ARM: 9410/1: vfp: Use asm volatile in fmrx/fmxr macros [ Upstream commit 89a906dfa8c3b21b3e5360f73c49234ac1eb885b ] Floating point instructions in userspace can crash some arm kernels built with clang/LLD 17.0.6: BUG: unsupported FP instruction in kernel mode FPEXC == 0xc0000780 Internal error: Oops - undefined instruction: 0 [#1] ARM CPU: 0 PID: 196 Comm: vfp-reproducer Not tainted 6.10.0 #1 Hardware name: BCM2835 PC is at vfp_support_entry+0xc8/0x2cc LR is at do_undefinstr+0xa8/0x250 pc : [] lr : [] psr: a0000013 sp : dc8d1f68 ip : 60000013 fp : bedea19c r10: ec532b17 r9 : 00000010 r8 : 0044766c r7 : c0000780 r6 : ec532b17 r5 : c1c13800 r4 : dc8d1fb0 r3 : c10072c4 r2 : c0101c88 r1 : ec532b17 r0 : 0044766c Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 00c5387d Table: 0251c008 DAC: 00000051 Register r0 information: non-paged memory Register r1 information: vmalloc memory Register r2 information: non-slab/vmalloc memory Register r3 information: non-slab/vmalloc memory Register r4 information: 2-page vmalloc region Register r5 information: slab kmalloc-cg-2k Register r6 information: vmalloc memory Register r7 information: non-slab/vmalloc memory Register r8 information: non-paged memory Register r9 information: zero-size pointer Register r10 information: vmalloc memory Register r11 information: non-paged memory Register r12 information: non-paged memory Process vfp-reproducer (pid: 196, stack limit = 0x61aaaf8b) Stack: (0xdc8d1f68 to 0xdc8d2000) 1f60: 0000081f b6f69300 0000000f c10073f4 c10072c4 dc8d1fb0 1f80: ec532b17 0c532b17 0044766c b6f9ccd8 00000000 c010a80c 00447670 60000010 1fa0: ffffffff c1c13800 00c5387d c0100f10 b6f68af8 00448fc0 00000000 bedea188 1fc0: bedea314 00000001 00448ebc b6f9d000 00447608 b6f9ccd8 00000000 bedea19c 1fe0: bede9198 bedea188 b6e1061c 0044766c 60000010 ffffffff 00000000 00000000 Call trace: [] (vfp_support_entry) from [] (do_undefinstr+0xa8/0x250) [] (do_undefinstr) from [] (__und_usr+0x70/0x80) Exception stack(0xdc8d1fb0 to 0xdc8d1ff8) 1fa0: b6f68af8 00448fc0 00000000 bedea188 1fc0: bedea314 00000001 00448ebc b6f9d000 00447608 b6f9ccd8 00000000 bedea19c 1fe0: bede9198 bedea188 b6e1061c 0044766c 60000010 ffffffff Code: 0a000061 e3877202 e594003c e3a09010 (eef16a10) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Fatal exception in interrupt ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This is a minimal userspace reproducer on a Raspberry Pi Zero W: #include #include int main(void) { double v = 1.0; printf("%fn", NAN + *(volatile double *)&v); return 0; } Another way to consistently trigger the oops is: calvin@raspberry-pi-zero-w ~$ python -c "import json" The bug reproduces only when the kernel is built with DYNAMIC_DEBUG=n, because the pr_debug() calls act as barriers even when not activated. This is the output from the same kernel source built with the same compiler and DYNAMIC_DEBUG=y, where the userspace reproducer works as expected: VFP: bounce: trigger ec532b17 fpexc c0000780 VFP: emulate: INST=0xee377b06 SCR=0x00000000 VFP: bounce: trigger eef1fa10 fpexc c0000780 VFP: emulate: INST=0xeeb40b40 SCR=0x00000000 VFP: raising exceptions 30000000 calvin@raspberry-pi-zero-w ~$ ./vfp-reproducer nan Crudely grepping for vmsr/vmrs instructions in the otherwise nearly idential text for vfp_support_entry() makes the problem obvious: vmlinux.llvm.good [0xc0101cb8] <+48>: vmrs r7, fpexc vmlinux.llvm.good [0xc0101cd8] <+80>: vmsr fpexc, r0 vmlinux.llvm.good [0xc0101d20] <+152>: vmsr fpexc, r7 vmlinux.llvm.good [0xc0101d38] <+176>: vmrs r4, fpexc vmlinux.llvm.good [0xc0101d6c] <+228>: vmrs r0, fpscr vmlinux.llvm.good [0xc0101dc4] <+316>: vmsr fpexc, r0 vmlinux.llvm.good [0xc0101dc8] <+320>: vmrs r0, fpsid vmlinux.llvm.good [0xc0101dcc] <+324>: vmrs r6, fpscr vmlinux.llvm.good [0xc0101e10] <+392>: vmrs r10, fpinst vmlinux.llvm.good [0xc0101eb8] <+560>: vmrs r10, fpinst2 vmlinux.llvm.bad [0xc0101cb8] <+48>: vmrs r7, fpexc vmlinux.llvm.bad [0xc0101cd8] <+80>: vmsr fpexc, r0 vmlinux.llvm.bad [0xc0101d20] <+152>: vmsr fpexc, r7 vmlinux.llvm.bad [0xc0101d30] <+168>: vmrs r0, fpscr vmlinux.llvm.bad [0xc0101d50] <+200>: vmrs r6, fpscr <== BOOM! vmlinux.llvm.bad [0xc0101d6c] <+228>: vmsr fpexc, r0 vmlinux.llvm.bad [0xc0101d70] <+232>: vmrs r0, fpsid vmlinux.llvm.bad [0xc0101da4] <+284>: vmrs r10, fpinst vmlinux.llvm.bad [0xc0101df8] <+368>: vmrs r4, fpexc vmlinux.llvm.bad [0xc0101e5c] <+468>: vmrs r10, fpinst2 I think LLVM's reordering is valid as the code is currently written: the compiler doesn't know the instructions have side effects in hardware. Fix by using "asm volatile" in fmxr() and fmrx(), so they cannot be reordered with respect to each other. The original compiler now produces working kernels on my hardware with DYNAMIC_DEBUG=n. This is the relevant piece of the diff of the vfp_support_entry() text, from the original oopsing kernel to a working kernel with this patch: vmrs r0, fpscr tst r0, #4096 bne 0xc0101d48 tst r0, #458752 beq 0xc0101ecc orr r7, r7, #536870912 ldr r0, [r4, #0x3c] mov r9, #16 -vmrs r6, fpscr orr r9, r9, #251658240 add r0, r0, #4 str r0, [r4, #0x3c] mvn r0, #159 sub r0, r0, #-1207959552 and r0, r7, r0 vmsr fpexc, r0 vmrs r0, fpsid +vmrs r6, fpscr and r0, r0, #983040 cmp r0, #65536 bne 0xc0101d88 Fixes: 4708fb041346 ("ARM: vfp: Reimplement VFP exception entry in C code") Signed-off-by: Calvin Owens Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/vfp/vfpinstr.h | 44 ++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/arm/vfp/vfpinstr.h b/arch/arm/vfp/vfpinstr.h index 3c7938fd40aa..32090b0fb250 100644 --- a/arch/arm/vfp/vfpinstr.h +++ b/arch/arm/vfp/vfpinstr.h @@ -64,33 +64,37 @@ #ifdef CONFIG_AS_VFP_VMRS_FPINST -#define fmrx(_vfp_) ({ \ - u32 __v; \ - asm(".fpu vfpv2\n" \ - "vmrs %0, " #_vfp_ \ - : "=r" (__v) : : "cc"); \ - __v; \ - }) +#define fmrx(_vfp_) ({ \ + u32 __v; \ + asm volatile (".fpu vfpv2\n" \ + "vmrs %0, " #_vfp_ \ + : "=r" (__v) : : "cc"); \ + __v; \ +}) -#define fmxr(_vfp_,_var_) \ - asm(".fpu vfpv2\n" \ - "vmsr " #_vfp_ ", %0" \ - : : "r" (_var_) : "cc") +#define fmxr(_vfp_, _var_) ({ \ + asm volatile (".fpu vfpv2\n" \ + "vmsr " #_vfp_ ", %0" \ + : : "r" (_var_) : "cc"); \ +}) #else #define vfpreg(_vfp_) #_vfp_ -#define fmrx(_vfp_) ({ \ - u32 __v; \ - asm("mrc p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmrx %0, " #_vfp_ \ - : "=r" (__v) : : "cc"); \ - __v; \ - }) +#define fmrx(_vfp_) ({ \ + u32 __v; \ + asm volatile ("mrc p10, 7, %0, " vfpreg(_vfp_) "," \ + "cr0, 0 @ fmrx %0, " #_vfp_ \ + : "=r" (__v) : : "cc"); \ + __v; \ +}) -#define fmxr(_vfp_,_var_) \ - asm("mcr p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmxr " #_vfp_ ", %0" \ - : : "r" (_var_) : "cc") +#define fmxr(_vfp_, _var_) ({ \ + asm volatile ("mcr p10, 7, %0, " vfpreg(_vfp_) "," \ + "cr0, 0 @ fmxr " #_vfp_ ", %0" \ + : : "r" (_var_) : "cc"); \ +}) #endif From 288cbc505e2046638c615c36357cb78bc9fee1e0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2024 11:41:34 +0300 Subject: [PATCH 020/534] powercap: intel_rapl: Fix off by one in get_rpi() [ Upstream commit 95f6580352a7225e619551febb83595bcb77ab17 ] The rp->priv->rpi array is either rpi_msr or rpi_tpmi which have NR_RAPL_PRIMITIVES number of elements. Thus the > needs to be >= to prevent an off by one access. Fixes: 98ff639a7289 ("powercap: intel_rapl: Support per Interface primitive information") Signed-off-by: Dan Carpenter Acked-by: Zhang Rui Link: https://patch.msgid.link/86e3a059-504d-4795-a5ea-4a653f3b41f8@stanley.mountain Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 3dfe45ac300a..f1de4111e98d 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -738,7 +738,7 @@ static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) { struct rapl_primitive_info *rpi = rp->priv->rpi; - if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) + if (prim < 0 || prim >= NR_RAPL_PRIMITIVES || !rpi) return NULL; return &rpi[prim]; From be158b7e6a46572cf23c1d93ff81a5b760cad9e9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 21 Aug 2024 17:44:01 +0100 Subject: [PATCH 021/534] kselftest/arm64: signal: fix/refactor SVE vector length enumeration [ Upstream commit 5225b6562b9a7dc808d5a1e465aaf5e2ebb220cd ] Currently a number of SVE/SME related tests have almost identical functions to enumerate all supported vector lengths. However over time the copy&pasted code has diverged, allowing some bugs to creep in: - fake_sigreturn_sme_change_vl reports a failure, not a SKIP if only one vector length is supported (but the SVE version is fine) - fake_sigreturn_sme_change_vl tries to set the SVE vector length, not the SME one (but the other SME tests are fine) - za_no_regs keeps iterating forever if only one vector length is supported (but za_regs is correct) Since those bugs seem to be mostly copy&paste ones, let's consolidate the enumeration loop into one shared function, and just call that from each test. That should fix the above bugs, and prevent similar issues from happening again. Fixes: 4963aeb35a9e ("kselftest/arm64: signal: Add SME signal handling tests") Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240821164401.3598545-1-andre.przywara@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- tools/testing/selftests/arm64/signal/Makefile | 2 +- .../selftests/arm64/signal/sve_helpers.c | 56 +++++++++++++++++++ .../selftests/arm64/signal/sve_helpers.h | 21 +++++++ .../testcases/fake_sigreturn_sme_change_vl.c | 32 +++-------- .../testcases/fake_sigreturn_sve_change_vl.c | 30 ++-------- .../arm64/signal/testcases/ssve_regs.c | 36 +++--------- .../arm64/signal/testcases/ssve_za_regs.c | 36 +++--------- .../arm64/signal/testcases/sve_regs.c | 32 +++-------- .../arm64/signal/testcases/za_no_regs.c | 32 +++-------- .../arm64/signal/testcases/za_regs.c | 36 +++--------- 10 files changed, 132 insertions(+), 181 deletions(-) create mode 100644 tools/testing/selftests/arm64/signal/sve_helpers.c create mode 100644 tools/testing/selftests/arm64/signal/sve_helpers.h diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index 8f5febaf1a9a..edb3613513b8 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS) # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \ - signals.S + signals.S sve_helpers.c COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h .SECONDEXPANSION: diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c new file mode 100644 index 000000000000..0acc121af306 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#include +#include +#include +#include + +unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls) +{ + int vq, vl; + int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL; + int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(pr_set_vl, vq * 16); + if (vl == -1) + return KSFT_FAIL; + + vl &= len_mask; + + /* + * Unlike SVE, SME does not require the minimum vector length + * to be implemented, or the VLs to be consecutive, so any call + * to the prctl might return the single implemented VL, which + * might be larger than 16. So to avoid this loop never + * terminating, bail out here when we find a higher VL than + * we asked for. + * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP. + */ + if (vq < sve_vq_from_vl(vl)) + break; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + if (nvls < min_vls) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return KSFT_SKIP; + } + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h new file mode 100644 index 000000000000..50948ce471cc --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#ifndef __SVE_HELPERS_H__ +#define __SVE_HELPERS_H__ + +#include + +#define VLS_USE_SVE false +#define VLS_USE_SME true + +extern unsigned int vls[]; +extern unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls); + +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index ebd5815b54bb..cb8c051b5c8f 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -6,44 +6,28 @@ * handler, this is not supported and is expected to segfault. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static int fake_sigreturn_ssve_change_vl(struct tdescr *td, diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c index e2a452190511..e1ccf8f85a70 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c @@ -12,40 +12,22 @@ #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SVE_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); + if (res == KSFT_SKIP) td->result = KSFT_SKIP; - return false; - } - return true; + return false; } static int fake_sigreturn_sve_change_vl(struct tdescr *td, diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 3d37daafcff5..6dbe48cf8b09 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -6,51 +6,31 @@ * set up as expected. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static void setup_ssve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c index 9dc5f128bbc0..5557e116e973 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c @@ -6,51 +6,31 @@ * signal frames is set up as expected when enabled simultaneously. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static void setup_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 8b16eabbb769..8143eb1c58c1 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SVE_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static void setup_sve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c index 4d6f94b6178f..ce26e9c2fa5e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index 174ad6656696..b9e13f27f1f9 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -6,51 +6,31 @@ * expected. */ +#include #include #include #include #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static void setup_za_regs(void) From 3b839d4619042b02eecdfc986484ac6e6be6acbf Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Thu, 22 Aug 2024 11:33:31 +0800 Subject: [PATCH 022/534] drivers/perf: Fix ali_drw_pmu driver interrupt status clearing [ Upstream commit a3dd920977dccc453c550260c4b7605b280b79c3 ] The alibaba_uncore_pmu driver forgot to clear all interrupt status in the interrupt processing function. After the PMU counter overflow interrupt occurred, an interrupt storm occurred, causing the system to hang. Therefore, clear the correct interrupt status in the interrupt handling function to fix it. Fixes: cf7b61073e45 ("drivers/perf: add DDR Sub-System Driveway PMU driver for Yitian 710 SoC") Signed-off-by: Jing Zhang Reviewed-by: Shuai Xue Acked-by: Mark Rutland Link: https://lore.kernel.org/r/1724297611-20686-1-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/alibaba_uncore_drw_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index 19d459a36be5..818ce4424d34 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -408,7 +408,7 @@ static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data) } /* clear common counter intr status */ - clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1); + clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status); writel(clr_status, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); } From d54455a3a965feb547711aff7afd2ca5deadb99c Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 29 Jul 2024 15:48:16 +0800 Subject: [PATCH 023/534] wifi: mac80211: don't use rate mask for offchannel TX either [ Upstream commit e7a7ef9a0742dbd0818d5b15fba2c5313ace765b ] Like the commit ab9177d83c04 ("wifi: mac80211: don't use rate mask for scanning"), ignore incorrect settings to avoid no supported rate warning reported by syzbot. The syzbot did bisect and found cause is commit 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band"), which however corrects bitmask of HE MCS and recognizes correctly settings of empty legacy rate plus HE MCS rate instead of returning -EINVAL. As suggestions [1], follow the change of SCAN TX to consider this case of offchannel TX as well. [1] https://lore.kernel.org/linux-wireless/6ab2dc9c3afe753ca6fdcdd1421e7a1f47e87b84.camel@sipsolutions.net/T/#m2ac2a6d2be06a37c9c47a3d8a44b4f647ed4f024 Reported-by: syzbot+8dd98a9e98ee28dc484a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-wireless/000000000000fdef8706191a3f7b@google.com/ Fixes: 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band") Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240729074816.20323-1-pkshih@realtek.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- include/net/mac80211.h | 7 ++++--- net/mac80211/offchannel.c | 1 + net/mac80211/rate.c | 2 +- net/mac80211/scan.c | 2 +- net/mac80211/tx.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a39bd4169f29..47ade676565d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -936,8 +936,9 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment - * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted - * due to scanning, not in normal operation on the interface. + * @IEEE80211_TX_CTRL_DONT_USE_RATE_MASK: Don't use rate mask for this frame + * which is transmitted due to scanning or offchannel TX, not in normal + * operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -958,7 +959,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), - IEEE80211_TX_CTRL_SCAN_TX = BIT(10), + IEEE80211_TX_CTRL_DONT_USE_RATE_MASK = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 5bedd9cef414..2517a5521a57 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -940,6 +940,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } IEEE80211_SKB_CB(skb)->flags = flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK; skb->dev = sdata->dev; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3cf252418bd3..78e7ac6c0af0 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -890,7 +890,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); - if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) mask = sdata->rc_rateidx_mask[info->band]; if (dest[0].idx < 0) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 3d68db738cde..b58d061333c5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -636,7 +636,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; - IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 415e951e4138..45a093d3f1fa 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -706,7 +706,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) { txrc.rate_idx_mask = ~0; } else { txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; From 305b7827cf5d5f6693c25d4ed3548387f1f167b3 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Thu, 13 Jun 2024 17:11:23 +0300 Subject: [PATCH 024/534] wifi: iwlwifi: remove AX101, AX201 and AX203 support from LNL [ Upstream commit 6adae0b081454393ca5f7363fd3c7379c8e2a7a1 ] LNL is the codename for the upcoming Series 2 Core Ultra processors designed by Intel. AX101, AX201 and AX203 devices are not shiped on LNL platforms, so don't support them. Signed-off-by: Golan Ben Ami Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240613171043.f24a228dfd96.I989a2d3f1513211bc49ac8143ee4e9e341e1ee67@changeid Signed-off-by: Johannes Berg Stable-dep-of: 8131dd52810d ("wifi: iwlwifi: config: label 'gl' devices as discrete") Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index dea4d6478b4f..365e19314f2c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,7 +504,37 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x272D, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_bz_trans_cfg)}, - {IWL_PCI_DEVICE(0xA840, PCI_ANY_ID, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0000, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0090, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0094, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0098, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x009C, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00C0, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00C4, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00E0, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00E4, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00E8, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x00EC, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0100, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0110, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0114, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0118, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x011C, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0310, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0314, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0510, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x0A10, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1671, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1672, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1771, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1772, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1791, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x1792, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x4090, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x40C4, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x40E0, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x4110, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0xA840, 0x4314, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x7740, PCI_ANY_ID, iwl_bz_trans_cfg)}, /* Sc devices */ From 2c4a7b5014224fcc8cb05dd0c2c0b266d8c898af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jul 2024 20:20:07 +0300 Subject: [PATCH 025/534] wifi: iwlwifi: config: label 'gl' devices as discrete [ Upstream commit 8131dd52810dfcdb49fcdc78f5e18e1538b6c441 ] The 'gl' devices are in the bz family, but they're not, integrated, so should have their own trans config struct. Fix that, also necessitating the removal of LTR config, and while at it remove 0x2727 and 0x272D IDs that were only used for test chips. Fixes: c30a2a64788b ("wifi: iwlwifi: add a new PCI device ID for BZ device")ticket=none Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240729201718.95aed0620080.Ib9129512c95aa57acc9876bdff8b99dd41e1562c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 +--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index cc71b513adf9..cebd3c91756f 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -152,6 +152,17 @@ const struct iwl_cfg_trans_params iwl_bz_trans_cfg = { .ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US, }; +const struct iwl_cfg_trans_params iwl_gl_trans_cfg = { + .device_family = IWL_DEVICE_FAMILY_BZ, + .base_params = &iwl_bz_base_params, + .mq_rx_supported = true, + .rf_id = true, + .gen2 = true, + .umac_prph_offset = 0x300000, + .xtal_latency = 12000, + .low_latency_xtal = true, +}; + const char iwl_bz_name[] = "Intel(R) TBD Bz device"; const struct iwl_cfg iwl_cfg_bz = { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index f45f645ca648..dd3913617bb0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -493,6 +493,7 @@ extern const struct iwl_cfg_trans_params iwl_so_long_latency_trans_cfg; extern const struct iwl_cfg_trans_params iwl_so_long_latency_imr_trans_cfg; extern const struct iwl_cfg_trans_params iwl_ma_trans_cfg; extern const struct iwl_cfg_trans_params iwl_bz_trans_cfg; +extern const struct iwl_cfg_trans_params iwl_gl_trans_cfg; extern const struct iwl_cfg_trans_params iwl_sc_trans_cfg; extern const char iwl9162_name[]; extern const char iwl9260_name[]; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 365e19314f2c..4a2de79f2e86 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -501,9 +501,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x7E40, PCI_ANY_ID, iwl_ma_trans_cfg)}, /* Bz devices */ - {IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)}, - {IWL_PCI_DEVICE(0x272D, PCI_ANY_ID, iwl_bz_trans_cfg)}, - {IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_gl_trans_cfg)}, {IWL_PCI_DEVICE(0xA840, 0x0000, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0xA840, 0x0090, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0xA840, 0x0094, iwl_bz_trans_cfg)}, From 1661f1352b5567163b1a73fedf11839d04c4b8b6 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 29 Jul 2024 20:20:12 +0300 Subject: [PATCH 026/534] wifi: iwlwifi: mvm: increase the time between ranging measurements [ Upstream commit 3a7ee94559dfd640604d0265739e86dec73b64e8 ] The algo running in fw may take a little longer than 5 milliseconds, (e.g. measurement on 80MHz while associated). Increase the minimum time between measurements to 7 milliseconds. Fixes: 830aa3e7d1ca ("iwlwifi: mvm: add support for range request command version 13") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240729201718.d3f3c26e00d9.I09e951290e8a3d73f147b88166fd9a678d1d69ed@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/constants.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h index 243eccc68cb0..f7bec6f3d758 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h @@ -103,7 +103,7 @@ #define IWL_MVM_FTM_INITIATOR_SECURE_LTF false #define IWL_MVM_FTM_RESP_NDP_SUPPORT true #define IWL_MVM_FTM_RESP_LMR_FEEDBACK_SUPPORT true -#define IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR 5 +#define IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR 7 #define IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR 1000 #define IWL_MVM_D3_DEBUG false #define IWL_MVM_USE_TWT true From 39dd1f1f48d39272260a88d7f927f0b435016e16 Mon Sep 17 00:00:00 2001 From: Kamlesh Gurudasani Date: Thu, 22 Aug 2024 02:32:52 +0530 Subject: [PATCH 027/534] padata: Honor the caller's alignment in case of chunk_size 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24cc57d8faaa4060fd58adf810b858fcfb71a02f ] In the case where we are forcing the ps.chunk_size to be at least 1, we are ignoring the caller's alignment. Move the forcing of ps.chunk_size to be at least 1 before rounding it up to caller's alignment, so that caller's alignment is honored. While at it, use max() to force the ps.chunk_size to be at least 1 to improve readability. Fixes: 6d45e1c948a8 ("padata: Fix possible divide-by-0 panic in padata_mt_helper()") Signed-off-by: Kamlesh Gurudasani Acked-by:  Waiman Long Acked-by: Daniel Jordan Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- kernel/padata.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/padata.c b/kernel/padata.c index 29545dd6dd53..da788a58d19c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -511,9 +511,12 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) * thread function. Load balance large jobs between threads by * increasing the number of chunks, guarantee at least the minimum * chunk size from the caller, and honor the caller's alignment. + * Ensure chunk_size is at least 1 to prevent divide-by-0 + * panic in padata_mt_helper(). */ ps.chunk_size = job->size / (ps.nworks * load_balance_factor); ps.chunk_size = max(ps.chunk_size, job->min_chunk); + ps.chunk_size = max(ps.chunk_size, 1ul); ps.chunk_size = roundup(ps.chunk_size, job->align); /* From 6206a0edb29bcb57f89255538c66f25b653230dc Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:30 +0800 Subject: [PATCH 028/534] drivers/perf: hisi_pcie: Record hardware counts correctly [ Upstream commit daecd3373a16a039ad241086e30a1ec46fc9d61f ] Currently we set the period and record it as the initial value of the counter without checking it's set to the hardware successfully or not. However the counter maybe unwritable if the target event is unsupported by the device. In such case we will pass user a wrong count: [start counts when setting the period] hwc->prev_count = 0x8000000000000000 device.counter_value = 0 // the counter is not set as the period [when user reads the counter] event->count = device.counter_value - hwc->prev_count = 0x8000000000000000 // wrong. should be 0. Fix this by record the hardware counter counts correctly when setting the period. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 430ca15373fe..570ae69c38ec 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -458,10 +458,24 @@ static void hisi_pcie_pmu_set_period(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + u64 orig_cnt, cnt; + + orig_cnt = hisi_pcie_pmu_read_counter(event); local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL); + + /* + * The counter maybe unwritable if the target event is unsupported. + * Check this by comparing the counts after setting the period. If + * the counts stay unchanged after setting the period then update + * the hwc->prev_count correctly. Otherwise the final counts user + * get maybe totally wrong. + */ + cnt = hisi_pcie_pmu_read_counter(event); + if (orig_cnt == cnt) + local64_set(&hwc->prev_count, cnt); } static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) From 666a46a90f186dd1860d84ce4dd1ec1aab0d37d8 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:31 +0800 Subject: [PATCH 029/534] drivers/perf: hisi_pcie: Fix TLP headers bandwidth counting [ Upstream commit 17bf68aeb3642221e3e770399b5a52f370747ac1 ] We make the initial value of event ctrl register as HISI_PCIE_INIT_SET and modify according to the user options. This will make TLP headers bandwidth only counting never take effect since HISI_PCIE_INIT_SET configures to count the TLP payloads bandwidth. Fix this by making the initial value of event ctrl register as 0. Fixes: 17d573984d4d ("drivers/perf: hisi: Add TLP filter support") Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 570ae69c38ec..4a902da5c1d4 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -221,7 +221,7 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 port, trig_len, thr_len, len_mode; - u64 reg = HISI_PCIE_INIT_SET; + u64 reg = 0; /* Config HISI_PCIE_EVENT_CTRL according to event. */ reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event)); From 37c5024e464cec32da861512477b695f67e4fe6a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 29 Aug 2024 18:20:09 +0100 Subject: [PATCH 030/534] kselftest/arm64: Actually test SME vector length changes via sigreturn [ Upstream commit 6f0315330af7a57c1c00587fdfb69c7778bf1c50 ] The test case for SME vector length changes via sigreturn use a bit too much cut'n'paste and only actually changed the SVE vector length in the test itself. Andre's recent factoring out of the initialisation code caused this to be exposed and the test to start failing. Fix the test to actually cover the thing it's supposed to test. Fixes: 4963aeb35a9e ("kselftest/arm64: signal: Add SME signal handling tests") Signed-off-by: Mark Brown Reviewed-by: Andre Przywara Tested-by: Andre Przywara Link: https://lore.kernel.org/r/20240829-arm64-sme-signal-vl-change-test-v1-1-42d7534cb818@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- .../testcases/fake_sigreturn_sme_change_vl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index cb8c051b5c8f..dfd6a2badf9f 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -35,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td, { size_t resv_sz, offset; struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); - struct sve_context *sve; + struct za_context *za; /* Get a signal context with a SME ZA frame in it */ if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, SVE_MAGIC, resv_sz, &offset); + head = get_header(head, ZA_MAGIC, resv_sz, &offset); if (!head) { - fprintf(stderr, "No SVE context\n"); + fprintf(stderr, "No ZA context\n"); return 1; } - if (head->size != sizeof(struct sve_context)) { + if (head->size != sizeof(struct za_context)) { fprintf(stderr, "Register data present, aborting\n"); return 1; } - sve = (struct sve_context *)head; + za = (struct za_context *)head; /* No changes are supported; init left us at minimum VL so go to max */ fprintf(stderr, "Attempting to change VL from %d to %d\n", - sve->vl, vls[0]); - sve->vl = vls[0]; + za->vl, vls[0]); + za->vl = vls[0]; fake_sigreturn(&sf, sizeof(sf), 0); From fa3ef5ea3ff40d84b2e311fafb0d7f3f92a946cb Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 29 Aug 2024 20:48:23 +0800 Subject: [PATCH 031/534] can: j1939: use correct function name in comment [ Upstream commit dc2ddcd136fe9b6196a7dd01f75f824beb02d43f ] The function j1939_cancel_all_active_sessions() was renamed to j1939_cancel_active_session() but name in comment wasn't updated. Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://patch.msgid.link/1724935703-44621-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/j1939/transport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 4be73de5033c..319f47df3330 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1179,10 +1179,10 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) break; case -ENETDOWN: /* In this case we should get a netdev_event(), all active - * sessions will be cleared by - * j1939_cancel_all_active_sessions(). So handle this as an - * error, but let j1939_cancel_all_active_sessions() do the - * cleanup including propagation of the error to user space. + * sessions will be cleared by j1939_cancel_active_session(). + * So handle this as an error, but let + * j1939_cancel_active_session() do the cleanup including + * propagation of the error to user space. */ break; case -EOVERFLOW: From 8ad28208be7bbe748e90442c45963ddbef0fd1e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 26 Aug 2024 12:16:44 +0200 Subject: [PATCH 032/534] ACPI: CPPC: Fix MASK_VAL() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 60949b7b805424f21326b450ca4f1806c06d982e ] MASK_VAL() was added as a way to handle bit_offset and bit_width for registers located in system memory address space. However, while suited for reading, it does not work for writing and result in corrupted registers when writing values with bit_offset > 0. Moreover, when a register is collocated with another one at the same address but with a different mask, the current code results in the other registers being overwritten with 0s. The write procedure for SYSTEM_MEMORY registers should actually read the value, mask it, update it and write it with the updated value. Moreover, since registers can be located in the same word, we must take care of locking the access before doing it. We should potentially use a global lock since we don't know in if register addresses aren't shared with another _CPC package but better not encourage vendors to do so. Assume that registers can use the same word inside a _CPC package and thus, use a per _CPC package lock. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Clément Léger Link: https://patch.msgid.link/20240826101648.95654-1-cleger@rivosinc.com [ rjw: Dropped redundant semicolon ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/cppc_acpi.c | 43 ++++++++++++++++++++++++++++++++++++---- include/acpi/cppc_acpi.h | 2 ++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d6934ba7a315..28217a995f79 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -167,8 +167,11 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); #define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) /* Shift and apply the mask for CPC reads/writes */ -#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ +#define MASK_VAL_READ(reg, val) (((val) >> (reg)->bit_offset) & \ GENMASK(((reg)->bit_width) - 1, 0)) +#define MASK_VAL_WRITE(reg, prev_val, val) \ + ((((val) & GENMASK(((reg)->bit_width) - 1, 0)) << (reg)->bit_offset) | \ + ((prev_val) & ~(GENMASK(((reg)->bit_width) - 1, 0) << (reg)->bit_offset))) \ static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -852,6 +855,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) /* Store CPU Logical ID */ cpc_ptr->cpu_id = pr->id; + spin_lock_init(&cpc_ptr->rmw_lock); /* Parse PSD data for this CPU */ ret = acpi_get_psd(cpc_ptr, handle); @@ -1057,7 +1061,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - *val = MASK_VAL(reg, *val); + *val = MASK_VAL_READ(reg, *val); return 0; } @@ -1066,9 +1070,11 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) { int ret_val = 0; int size; + u64 prev_val; void __iomem *vaddr = NULL; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; + struct cpc_desc *cpc_desc; size = GET_BIT_WIDTH(reg); @@ -1101,8 +1107,34 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return acpi_os_write_memory((acpi_physical_address)reg->address, val, size); - if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - val = MASK_VAL(reg, val); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + cpc_desc = per_cpu(cpc_desc_ptr, cpu); + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + spin_lock(&cpc_desc->rmw_lock); + switch (size) { + case 8: + prev_val = readb_relaxed(vaddr); + break; + case 16: + prev_val = readw_relaxed(vaddr); + break; + case 32: + prev_val = readl_relaxed(vaddr); + break; + case 64: + prev_val = readq_relaxed(vaddr); + break; + default: + spin_unlock(&cpc_desc->rmw_lock); + return -EFAULT; + } + val = MASK_VAL_WRITE(reg, prev_val, val); + val |= prev_val; + } switch (size) { case 8: @@ -1129,6 +1161,9 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) break; } + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + spin_unlock(&cpc_desc->rmw_lock); + return ret_val; } diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index c0b69ffe7bdb..ec425d2834f8 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -64,6 +64,8 @@ struct cpc_desc { int cpu_id; int write_cmd_status; int write_cmd_id; + /* Lock used for RMW operations in cpc_write() */ + spinlock_t rmw_lock; struct cpc_register_resource cpc_regs[MAX_CPC_REG_ENT]; struct acpi_psd_package domain_info; struct kobject kobj; From 08b25d59ffb049ca6c3ed87fecbbef70c9a16f39 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:06:41 +0200 Subject: [PATCH 033/534] netfilter: nf_tables: elements with timeout below CONFIG_HZ never expire [ Upstream commit e0c47281723f301894c14e6f5cd5884fdfb813f9 ] Element timeout that is below CONFIG_HZ never expires because the timeout extension is not allocated given that nf_msecs_to_jiffies64() returns 0. Set timeout to the minimum value to honor timeout. Fixes: 8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23 days") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index da5684e3fd08..b28acdaaf970 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4462,7 +4462,7 @@ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return -ERANGE; ms *= NSEC_PER_MSEC; - *result = nsecs_to_jiffies64(ms); + *result = nsecs_to_jiffies64(ms) ? : !!ms; return 0; } From 2a5e648a0cb682181b78f14fea78ec621bf4fc9e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:06:49 +0200 Subject: [PATCH 034/534] netfilter: nf_tables: reject element expiration with no timeout [ Upstream commit d2dc429ecb4e79ad164028d965c00f689e6f6d06 ] If element timeout is unset and set provides no default timeout, the element expiration is silently ignored, reject this instead to let user know this is unsupported. Also prepare for supporting timeout that never expire, where zero timeout and expiration must be also rejected. Fixes: 8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23 days") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b28acdaaf970..8bb61fb62a2b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6698,6 +6698,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (timeout == 0) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION], &expiration); if (err) From 9431e5eddcc153bb55d0fd73fd3ac75969e7c99a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:06:58 +0200 Subject: [PATCH 035/534] netfilter: nf_tables: reject expiration higher than timeout [ Upstream commit c0f38a8c60174368aed1d0f9965d733195f15033 ] Report ERANGE to userspace if user specifies an expiration larger than the timeout. Fixes: 8e1102d5a159 ("netfilter: nf_tables: support timeouts larger than 23 days") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8bb61fb62a2b..842c9ac6e234 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6705,6 +6705,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &expiration); if (err) return err; + + if (expiration > timeout) + return -ERANGE; } if (nla[NFTA_SET_ELEM_EXPR]) { From 814b8bc5cc0a2d9f0cbee68c9171b07aadaea03b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Sep 2024 01:07:06 +0200 Subject: [PATCH 036/534] netfilter: nf_tables: remove annotation to access set timeout while holding lock [ Upstream commit 15d8605c0cf4fc9cf4386cae658c68a0fd4bdb92 ] Mutex is held when adding an element, no need for READ_ONCE, remove it. Fixes: 123b99619cca ("netfilter: nf_tables: honor set timeout and garbage collection updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 842c9ac6e234..6b6c16c5fc9a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6691,7 +6691,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { - timeout = READ_ONCE(set->timeout); + timeout = set->timeout; } expiration = 0; @@ -6798,7 +6798,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key_end; - if (timeout != READ_ONCE(set->timeout)) { + if (timeout != set->timeout) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; From f5c4ec8d0e2f424472216d1d6c55d48b2a4a3d1b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 20 Oct 2023 18:51:26 +0100 Subject: [PATCH 037/534] perf/arm-cmn: Rework DTC counters (again) [ Upstream commit 7633ec2c262fab3e7c5bf3cd3876b5748f584a57 ] The bitmap-based scheme for tracking DTC counter usage turns out to be a complete dead-end for its imagined purpose, since by the time we have to keep track of a per-DTC counter index anyway, we already have enough information to make the bitmap itself redundant. Revert the remains of it back to almost the original scheme, but now expanded to track per-DTC indices, in preparation for making use of them in anger. Note that since cycle count events always use a dedicated counter on a single DTC, we reuse the field to encode their DTC index directly. Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Link: https://lore.kernel.org/r/5f6ade76b47f033836d7a36c03555da896dfb4a3.1697824215.git.robin.murphy@arm.com Signed-off-by: Will Deacon Stable-dep-of: e79634b53e39 ("perf/arm-cmn: Refactor node ID handling. Again.") Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 126 +++++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 2c684e49a6fc..1c9e42aeb700 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -281,16 +281,13 @@ struct arm_cmn_node { u16 id, logid; enum cmn_node_type type; - int dtm; - union { - /* DN/HN-F/CXHA */ - struct { - u8 val : 4; - u8 count : 4; - } occupid[SEL_MAX]; - /* XP */ - u8 dtc; - }; + u8 dtm; + s8 dtc; + /* DN/HN-F/CXHA */ + struct { + u8 val : 4; + u8 count : 4; + } occupid[SEL_MAX]; union { u8 event[4]; __le32 event_sel; @@ -540,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) { - u8 dtc = cmn->xps[xp_base + x].dtc; + s8 dtc = cmn->xps[xp_base + x].dtc; - if (dtc & (dtc - 1)) + if (dtc < 0) seq_puts(s, " DTC ?? |"); else - seq_printf(s, " DTC %ld |", __ffs(dtc)); + seq_printf(s, " DTC %d |", dtc); } seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) @@ -589,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {} struct arm_cmn_hw_event { struct arm_cmn_node *dn; u64 dtm_idx[4]; - unsigned int dtc_idx; - u8 dtcs_used; + s8 dtc_idx[CMN_MAX_DTCS]; u8 num_dns; u8 dtm_offset; bool wide_sel; @@ -600,6 +596,10 @@ struct arm_cmn_hw_event { #define for_each_hw_dn(hw, dn, i) \ for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) +/* @i is the DTC number, @idx is the counter index on that DTC */ +#define for_each_hw_dtc_idx(hw, i, idx) \ + for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0) + static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) { BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); @@ -1429,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event) { struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_hw_event *hw = to_cmn_hw(event); - unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx); u64 count; - for (i = 0; hw->dtcs_used & (1U << i); i++) { - writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt); - cmn->dtc[i].counters[hw->dtc_idx] = event; + for_each_hw_dtc_idx(hw, i, idx) { + writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx)); + cmn->dtc[i].counters[idx] = event; } count = arm_cmn_read_dtm(cmn, hw, false); @@ -1447,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event) struct arm_cmn_hw_event *hw = to_cmn_hw(event); u64 delta, new, prev; unsigned long flags; - unsigned int i; - if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) { - i = __ffs(hw->dtcs_used); - delta = arm_cmn_read_cc(cmn->dtc + i); + if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) { + delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]); local64_add(delta, &event->count); return; } @@ -1461,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event) delta = new - prev; local_irq_save(flags); - for (i = 0; hw->dtcs_used & (1U << i); i++) { - new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx); + for_each_hw_dtc_idx(hw, i, idx) { + new = arm_cmn_read_counter(cmn->dtc + i, idx); delta += new << 16; } local_irq_restore(flags); @@ -1518,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = __ffs(hw->dtcs_used); + i = hw->dtc_idx[0]; writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); cmn->dtc[i].cc_active = true; } else if (type == CMN_TYPE_WP) { @@ -1549,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = __ffs(hw->dtcs_used); + i = hw->dtc_idx[0]; cmn->dtc[i].cc_active = false; } else if (type == CMN_TYPE_WP) { int wp_idx = arm_cmn_wp_idx(event); @@ -1735,12 +1732,19 @@ static int arm_cmn_event_init(struct perf_event *event) hw->dn = arm_cmn_node(cmn, type); if (!hw->dn) return -EINVAL; + + memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx)); for (dn = hw->dn; dn->type == type; dn++) { if (bynodeid && dn->id != nodeid) { hw->dn++; continue; } hw->num_dns++; + if (dn->dtc < 0) + memset(hw->dtc_idx, 0, cmn->num_dtcs); + else + hw->dtc_idx[dn->dtc] = 0; + if (bynodeid) break; } @@ -1752,12 +1756,6 @@ static int arm_cmn_event_init(struct perf_event *event) nodeid, nid.x, nid.y, nid.port, nid.dev, type); return -EINVAL; } - /* - * Keep assuming non-cycles events count in all DTC domains; turns out - * it's hard to make a worthwhile optimisation around this, short of - * going all-in with domain-local counter allocation as well. - */ - hw->dtcs_used = (1U << cmn->num_dtcs) - 1; return arm_cmn_validate_group(cmn, event); } @@ -1783,28 +1781,25 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, } memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); - for (i = 0; hw->dtcs_used & (1U << i); i++) - cmn->dtc[i].counters[hw->dtc_idx] = NULL; + for_each_hw_dtc_idx(hw, j, idx) + cmn->dtc[j].counters[idx] = NULL; } static int arm_cmn_event_add(struct perf_event *event, int flags) { struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_hw_event *hw = to_cmn_hw(event); - struct arm_cmn_dtc *dtc = &cmn->dtc[0]; struct arm_cmn_node *dn; enum cmn_node_type type = CMN_EVENT_TYPE(event); - unsigned int i, dtc_idx, input_sel; + unsigned int input_sel, i = 0; if (type == CMN_TYPE_DTC) { - i = 0; while (cmn->dtc[i].cycles) if (++i == cmn->num_dtcs) return -ENOSPC; cmn->dtc[i].cycles = event; - hw->dtc_idx = CMN_DT_NUM_COUNTERS; - hw->dtcs_used = 1U << i; + hw->dtc_idx[0] = i; if (flags & PERF_EF_START) arm_cmn_event_start(event, 0); @@ -1812,17 +1807,22 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) } /* Grab a free global counter first... */ - dtc_idx = 0; - while (dtc->counters[dtc_idx]) - if (++dtc_idx == CMN_DT_NUM_COUNTERS) - return -ENOSPC; - - hw->dtc_idx = dtc_idx; + for_each_hw_dtc_idx(hw, j, idx) { + if (j > 0) { + idx = hw->dtc_idx[0]; + } else { + idx = 0; + while (cmn->dtc[j].counters[idx]) + if (++idx == CMN_DT_NUM_COUNTERS) + goto free_dtms; + } + hw->dtc_idx[j] = idx; + } /* ...then the local counters to feed it. */ for_each_hw_dn(hw, dn, i) { struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset; - unsigned int dtm_idx, shift; + unsigned int dtm_idx, shift, d = 0; u64 reg; dtm_idx = 0; @@ -1841,11 +1841,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) tmp = dtm->wp_event[wp_idx ^ 1]; if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != - CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) + CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp])) goto free_dtms; input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; - dtm->wp_event[wp_idx] = dtc_idx; + dtm->wp_event[wp_idx] = hw->dtc_idx[d]; writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); @@ -1865,7 +1865,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) dtm->input_sel[dtm_idx] = input_sel; shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); - dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; + dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift; dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low; writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG); @@ -1893,7 +1893,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags) arm_cmn_event_stop(event, PERF_EF_UPDATE); if (type == CMN_TYPE_DTC) - cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL; + cmn->dtc[hw->dtc_idx[0]].cycles = NULL; else arm_cmn_event_clear(cmn, event, hw->num_dns); } @@ -2074,7 +2074,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) { struct arm_cmn_node *dn, *xp; int dtc_idx = 0; - u8 dtcs_present = (1 << cmn->num_dtcs) - 1; cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); if (!cmn->dtc) @@ -2084,23 +2083,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); + if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) { + /* We do at least know that a DTC's XP must be in that DTC's domain */ + dn = arm_cmn_node(cmn, CMN_TYPE_DTC); + for (int i = 0; i < cmn->num_dtcs; i++) + arm_cmn_node_to_xp(cmn, dn + i)->dtc = i; + } + for (dn = cmn->dns; dn->type; dn++) { - if (dn->type == CMN_TYPE_XP) { - dn->dtc &= dtcs_present; + if (dn->type == CMN_TYPE_XP) continue; - } xp = arm_cmn_node_to_xp(cmn, dn); + dn->dtc = xp->dtc; dn->dtm = xp->dtm; if (cmn->multi_dtm) dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2; if (dn->type == CMN_TYPE_DTC) { - int err; - /* We do at least know that a DTC's XP must be in that DTC's domain */ - if (xp->dtc == 0xf) - xp->dtc = 1 << dtc_idx; - err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); + int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); + if (err) return err; } @@ -2258,9 +2260,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->mesh_x = xp->logid; if (cmn->part == PART_CMN600) - xp->dtc = 0xf; + xp->dtc = -1; else - xp->dtc = 1 << arm_cmn_dtc_domain(cmn, xp_region); + xp->dtc = arm_cmn_dtc_domain(cmn, xp_region); xp->dtm = dtm - cmn->dtms; arm_cmn_init_dtm(dtm++, xp, 0); From a1b25661a04a5612eb8a77391fd4939238f5783c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 13 Dec 2023 16:24:07 +0000 Subject: [PATCH 038/534] perf/arm-cmn: Improve debugfs pretty-printing for large configs [ Upstream commit a1083ee717e9bde012268782e084d343314490a4 ] The debugfs pretty-printer was written for the CMN-600 assumptions of a maximum 8x8 mesh, but CMN-700 now allows coordinates and ID values up to 12 and 128 respectively, which can overflow the format strings, mess up the alignment of the table and hurt overall readability. This table does prove useful for double-checking that the driver is picking up the topology of new systems correctly and for verifying user expectations, so tweak the formatting to stay nice and readable with wider values. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/1d1517eadd1bac5992fab679c9dc531b381944da.1702484646.git.robin.murphy@arm.com Signed-off-by: Will Deacon Stable-dep-of: e79634b53e39 ("perf/arm-cmn: Refactor node ID handling. Again.") Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 1c9e42aeb700..3c3a3d3420e1 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -493,6 +493,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) for (dn = cmn->dns; dn->type; dn++) { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + int pad = dn->logid < 10; if (dn->type == CMN_TYPE_XP) continue; @@ -503,7 +504,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d) continue; - seq_printf(s, " #%-2d |", dn->logid); + seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid); return; } seq_puts(s, " |"); @@ -516,7 +517,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, " X"); for (x = 0; x < cmn->mesh_x; x++) - seq_printf(s, " %d ", x); + seq_printf(s, " %-2d ", x); seq_puts(s, "\nY P D+"); y = cmn->mesh_y; while (y--) { @@ -526,13 +527,13 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) for (x = 0; x < cmn->mesh_x; x++) seq_puts(s, "--------+"); - seq_printf(s, "\n%d |", y); + seq_printf(s, "\n%-2d |", y); for (x = 0; x < cmn->mesh_x; x++) { struct arm_cmn_node *xp = cmn->xps + xp_base + x; for (p = 0; p < CMN_MAX_PORTS; p++) port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); - seq_printf(s, " XP #%-2d |", xp_base + x); + seq_printf(s, " XP #%-3d|", xp_base + x); } seq_puts(s, "\n |"); From a687d9d1feddee92f1c5880981b3c6ff488e3755 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 2 Sep 2024 18:51:57 +0100 Subject: [PATCH 039/534] perf/arm-cmn: Refactor node ID handling. Again. [ Upstream commit e79634b53e398966c49f803c49701bc74dc3ccf8 ] The scope of the "extra device ports" configuration is not made clear by the CMN documentation - so far we've assumed it applies globally, based on the sole example which suggests as much. However it transpires that this is incorrect, and the format does in fact vary based on each individual XP's port configuration. As a consequence, we're currenly liable to decode the port/device indices from a node ID incorrectly, thus program the wrong event source in the DTM leading to bogus event counts, and also show device topology on the wrong ports in debugfs. To put this right, rework node IDs yet again to carry around the additional data necessary to decode them properly per-XP. At this point the notion of fully decomposing an ID becomes more impractical than it's worth, so unabstracting the XY mesh coordinates (where 2/3 users were just debug anyway) ends up leaving things a bit simpler overall. Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features") Acked-by: Mark Rutland Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5195f990152fc37adba5fbf5929a6b11063d9f09.1725296395.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 3c3a3d3420e1..311a85b6be2e 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -24,14 +24,6 @@ #define CMN_NI_NODE_ID GENMASK_ULL(31, 16) #define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) -#define CMN_NODEID_DEVID(reg) ((reg) & 3) -#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1) -#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) -#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3) -#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7) -#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) -#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) - #define CMN_CHILD_INFO 0x0080 #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) @@ -281,8 +273,11 @@ struct arm_cmn_node { u16 id, logid; enum cmn_node_type type; + /* XP properties really, but replicated to children for convenience */ u8 dtm; s8 dtc; + u8 portid_bits:4; + u8 deviceid_bits:4; /* DN/HN-F/CXHA */ struct { u8 val : 4; @@ -358,49 +353,33 @@ struct arm_cmn { static int arm_cmn_hp_state; struct arm_cmn_nodeid { - u8 x; - u8 y; u8 port; u8 dev; }; static int arm_cmn_xyidbits(const struct arm_cmn *cmn) { - return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2); + return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1)); } -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id) +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn) { struct arm_cmn_nodeid nid; - if (cmn->num_xps == 1) { - nid.x = 0; - nid.y = 0; - nid.port = CMN_NODEID_1x1_PID(id); - nid.dev = CMN_NODEID_DEVID(id); - } else { - int bits = arm_cmn_xyidbits(cmn); - - nid.x = CMN_NODEID_X(id, bits); - nid.y = CMN_NODEID_Y(id, bits); - if (cmn->ports_used & 0xc) { - nid.port = CMN_NODEID_EXT_PID(id); - nid.dev = CMN_NODEID_EXT_DEVID(id); - } else { - nid.port = CMN_NODEID_PID(id); - nid.dev = CMN_NODEID_DEVID(id); - } - } + nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1); + nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1); return nid; } static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn, const struct arm_cmn_node *dn) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); - int xp_idx = cmn->mesh_x * nid.y + nid.x; + int id = dn->id >> (dn->portid_bits + dn->deviceid_bits); + int bits = arm_cmn_xyidbits(cmn); + int x = id >> bits; + int y = id & ((1U << bits) - 1); - return cmn->xps + xp_idx; + return cmn->xps + cmn->mesh_x * y + x; } static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, enum cmn_node_type type) @@ -486,13 +465,13 @@ static const char *arm_cmn_device_type(u8 type) } } -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d) { struct arm_cmn *cmn = s->private; struct arm_cmn_node *dn; + u16 id = xp->id | d | (p << xp->deviceid_bits); for (dn = cmn->dns; dn->type; dn++) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); int pad = dn->logid < 10; if (dn->type == CMN_TYPE_XP) @@ -501,7 +480,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) if (dn->type < CMN_TYPE_HNI) continue; - if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d) + if (dn->id != id) continue; seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid); @@ -522,6 +501,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) y = cmn->mesh_y; while (y--) { int xp_base = cmn->mesh_x * y; + struct arm_cmn_node *xp = cmn->xps + xp_base; u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION]; for (x = 0; x < cmn->mesh_x; x++) @@ -529,16 +509,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_printf(s, "\n%-2d |", y); for (x = 0; x < cmn->mesh_x; x++) { - struct arm_cmn_node *xp = cmn->xps + xp_base + x; - for (p = 0; p < CMN_MAX_PORTS; p++) - port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); + port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p); seq_printf(s, " XP #%-3d|", xp_base + x); } seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) { - s8 dtc = cmn->xps[xp_base + x].dtc; + s8 dtc = xp[x].dtc; if (dtc < 0) seq_puts(s, " DTC ?? |"); @@ -555,10 +533,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, arm_cmn_device_type(port[p][x])); seq_puts(s, "\n 0|"); for (x = 0; x < cmn->mesh_x; x++) - arm_cmn_show_logid(s, x, y, p, 0); + arm_cmn_show_logid(s, xp + x, p, 0); seq_puts(s, "\n 1|"); for (x = 0; x < cmn->mesh_x; x++) - arm_cmn_show_logid(s, x, y, p, 1); + arm_cmn_show_logid(s, xp + x, p, 1); } seq_puts(s, "\n-----+"); } @@ -1751,10 +1729,7 @@ static int arm_cmn_event_init(struct perf_event *event) } if (!hw->num_dns) { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid); - - dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", - nodeid, nid.x, nid.y, nid.port, nid.dev, type); + dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type); return -EINVAL; } @@ -1849,7 +1824,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) dtm->wp_event[wp_idx] = hw->dtc_idx[d]; writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { - struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + struct arm_cmn_nodeid nid = arm_cmn_nid(dn); if (cmn->multi_dtm) nid.port %= 2; @@ -2096,10 +2071,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) continue; xp = arm_cmn_node_to_xp(cmn, dn); + dn->portid_bits = xp->portid_bits; + dn->deviceid_bits = xp->deviceid_bits; dn->dtc = xp->dtc; dn->dtm = xp->dtm; if (cmn->multi_dtm) - dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2; + dn->dtm += arm_cmn_nid(dn).port / 2; if (dn->type == CMN_TYPE_DTC) { int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); @@ -2269,18 +2246,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) arm_cmn_init_dtm(dtm++, xp, 0); /* * Keeping track of connected ports will let us filter out - * unnecessary XP events easily. We can also reliably infer the - * "extra device ports" configuration for the node ID format - * from this, since in that case we will see at least one XP - * with port 2 connected, for the HN-D. + * unnecessary XP events easily, and also infer the per-XP + * part of the node ID format. */ for (int p = 0; p < CMN_MAX_PORTS; p++) if (arm_cmn_device_connect_info(cmn, xp, p)) xp_ports |= BIT(p); - if (cmn->multi_dtm && (xp_ports & 0xc)) + if (cmn->num_xps == 1) { + xp->portid_bits = 3; + xp->deviceid_bits = 2; + } else if (xp_ports > 0x3) { + xp->portid_bits = 2; + xp->deviceid_bits = 1; + } else { + xp->portid_bits = 1; + xp->deviceid_bits = 2; + } + + if (cmn->multi_dtm && (xp_ports > 0x3)) arm_cmn_init_dtm(dtm++, xp, 1); - if (cmn->multi_dtm && (xp_ports & 0x30)) + if (cmn->multi_dtm && (xp_ports > 0xf)) arm_cmn_init_dtm(dtm++, xp, 2); cmn->ports_used |= xp_ports; From 5418a61e3207883016e6a9c8646c19cae06983de Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 2 Sep 2024 18:51:58 +0100 Subject: [PATCH 040/534] perf/arm-cmn: Fix CCLA register offset [ Upstream commit 88b63a82c84ed9bbcbdefb10cb6f75dd1dd04887 ] Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just the CCLA_RNI combination type. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Acked-by: Mark Rutland Reviewed-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/6e7bb06fef6046f83e7647aad0e5be544139763f.1725296395.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 311a85b6be2e..3926586685cb 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -70,7 +70,8 @@ /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */ #define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32) -/* HN-Ps are weird... */ +/* Some types are designed to coexist with another device in the same node */ +#define CMN_CCLA_PMU_EVENT_SEL 0x008 #define CMN_HNP_PMU_EVENT_SEL 0x008 /* DTMs live in the PMU space of XP registers */ @@ -2321,10 +2322,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_CXHA: case CMN_TYPE_CCRA: case CMN_TYPE_CCHA: - case CMN_TYPE_CCLA: case CMN_TYPE_HNS: dn++; break; + case CMN_TYPE_CCLA: + dn->pmu_base += CMN_CCLA_PMU_EVENT_SEL; + dn++; + break; /* Nothing to see here */ case CMN_TYPE_MPAM_S: case CMN_TYPE_MPAM_NS: @@ -2342,7 +2346,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_HNP: case CMN_TYPE_CCLA_RNI: dn[1] = dn[0]; - dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL; + dn[0].pmu_base += CMN_CCLA_PMU_EVENT_SEL; dn[1].type = arm_cmn_subtype(dn->type); dn += 2; break; From c902e515b6b414dfa582b0854ea4f00ed13ce630 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 2 Sep 2024 18:51:59 +0100 Subject: [PATCH 041/534] perf/arm-cmn: Ensure dtm_idx is big enough [ Upstream commit 359414b33e00bae91e4eabf3e4ef8e76024c7673 ] While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked out OK in practice. However CMN-700 did finally support up to 144 XPs, and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP event on a maxed-out config. Oops. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e771b358526a0d7fc06efee2c3a2fdc0c9f51d44.1725296395.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 3926586685cb..7d5e8cb96e9b 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -35,6 +35,9 @@ #define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION) #define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4) +/* Currently XPs are the node type we can have most of; others top out at 128 */ +#define CMN_MAX_NODES_PER_EVENT CMN_MAX_XPS + /* The CFG node has various info besides the discovery tree */ #define CMN_CFGM_PERIPH_ID_01 0x0008 #define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0) @@ -565,7 +568,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {} struct arm_cmn_hw_event { struct arm_cmn_node *dn; - u64 dtm_idx[4]; + u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)]; s8 dtc_idx[CMN_MAX_DTCS]; u8 num_dns; u8 dtm_offset; From 4d3608ae154b394f23bb55fce193585dcc317068 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 28 Aug 2024 08:19:15 -0500 Subject: [PATCH 042/534] cpufreq: ti-cpufreq: Introduce quirks to handle syscon fails appropriately [ Upstream commit abc00ffda43bd4ba85896713464c7510c39f8165 ] Commit b4bc9f9e27ed ("cpufreq: ti-cpufreq: add support for omap34xx and omap36xx") introduced special handling for OMAP3 class devices where syscon node may not be present. However, this also creates a bug where the syscon node is present, however the offset used to read is beyond the syscon defined range. Fix this by providing a quirk option that is populated when such special handling is required. This allows proper failure for all other platforms when the syscon node and efuse offsets are mismatched. Fixes: b4bc9f9e27ed ("cpufreq: ti-cpufreq: add support for omap34xx and omap36xx") Signed-off-by: Nishanth Menon Tested-by: Dhruva Gole Reviewed-by: Kevin Hilman Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/ti-cpufreq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index d88ee87b1cd6..cb5d1c8fefeb 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -61,6 +61,9 @@ struct ti_cpufreq_soc_data { unsigned long efuse_shift; unsigned long rev_offset; bool multi_regulator; +/* Backward compatibility hack: Might have missing syscon */ +#define TI_QUIRK_SYSCON_MAY_BE_MISSING 0x1 + u8 quirks; }; struct ti_cpufreq_data { @@ -182,6 +185,7 @@ static struct ti_cpufreq_soc_data omap34xx_soc_data = { .efuse_mask = BIT(3), .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = false, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; /* @@ -209,6 +213,7 @@ static struct ti_cpufreq_soc_data omap36xx_soc_data = { .efuse_mask = BIT(9), .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = true, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; /* @@ -223,6 +228,7 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .efuse_mask = 0, .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE, .multi_regulator = false, + .quirks = TI_QUIRK_SYSCON_MAY_BE_MISSING, }; static struct ti_cpufreq_soc_data am625_soc_data = { @@ -250,7 +256,7 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data, ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset, &efuse); - if (ret == -EIO) { + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) { /* not a syscon register! */ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE + opp_data->soc_data->efuse_offset, 4); @@ -291,7 +297,7 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data, ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset, &revision); - if (ret == -EIO) { + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) { /* not a syscon register! */ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE + opp_data->soc_data->rev_offset, 4); From 818dd118f4a997f8b4fe9c010b22402d410a2424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 13 Jul 2024 15:00:10 +0200 Subject: [PATCH 043/534] wifi: mt76: mt7915: fix oops on non-dbdc mt7986 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 862bf7cbd772c2bad570ef0c5b5556a1330656dd ] mt7915_band_config() sets band_idx = 1 on the main phy for mt7986 with MT7975_ONE_ADIE or MT7976_ONE_ADIE. Commit 0335c034e726 ("wifi: mt76: fix race condition related to checking tx queue fill status") introduced a dereference of the phys array indirectly indexed by band_idx via wcid->phy_idx in mt76_wcid_cleanup(). This caused the following Oops on affected mt7986 devices: Unable to handle kernel read from unreadable memory at virtual address 0000000000000024 Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 4k pages, 39-bit VAs, pgdp=0000000042545000 [0000000000000024] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 0000000096000005 [#1] SMP Modules linked in: ... mt7915e mt76_connac_lib mt76 mac80211 cfg80211 ... CPU: 2 PID: 1631 Comm: hostapd Not tainted 5.15.150 #0 Hardware name: ZyXEL EX5700 (Telenor) (DT) pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mt76_wcid_cleanup+0x84/0x22c [mt76] lr : mt76_wcid_cleanup+0x64/0x22c [mt76] sp : ffffffc00a803700 x29: ffffffc00a803700 x28: ffffff80008f7300 x27: ffffff80003f3c00 x26: ffffff80000a7880 x25: ffffffc008c26e00 x24: 0000000000000001 x23: ffffffc000a68114 x22: 0000000000000000 x21: ffffff8004172cc8 x20: ffffffc00a803748 x19: ffffff8004152020 x18: 0000000000000000 x17: 00000000000017c0 x16: ffffffc008ef5000 x15: 0000000000000be0 x14: ffffff8004172e28 x13: ffffff8004172e28 x12: 0000000000000000 x11: 0000000000000000 x10: ffffff8004172e30 x9 : ffffff8004172e28 x8 : 0000000000000000 x7 : ffffff8004156020 x6 : 0000000000000000 x5 : 0000000000000031 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000000 x1 : ffffff80008f7300 x0 : 0000000000000024 Call trace: mt76_wcid_cleanup+0x84/0x22c [mt76] __mt76_sta_remove+0x70/0xbc [mt76] mt76_sta_state+0x8c/0x1a4 [mt76] mt7915_eeprom_get_power_delta+0x11e4/0x23a0 [mt7915e] drv_sta_state+0x144/0x274 [mac80211] sta_info_move_state+0x1cc/0x2a4 [mac80211] sta_set_sinfo+0xaf8/0xc24 [mac80211] sta_info_destroy_addr_bss+0x4c/0x6c [mac80211] ieee80211_color_change_finish+0x1c08/0x1e70 [mac80211] cfg80211_check_station_change+0x1360/0x4710 [cfg80211] genl_family_rcv_msg_doit+0xb4/0x110 genl_rcv_msg+0xd0/0x1bc netlink_rcv_skb+0x58/0x120 genl_rcv+0x34/0x50 netlink_unicast+0x1f0/0x2ec netlink_sendmsg+0x198/0x3d0 ____sys_sendmsg+0x1b0/0x210 ___sys_sendmsg+0x80/0xf0 __sys_sendmsg+0x44/0xa0 __arm64_sys_sendmsg+0x20/0x30 invoke_syscall.constprop.0+0x4c/0xe0 do_el0_svc+0x40/0xd0 el0_svc+0x14/0x4c el0t_64_sync_handler+0x100/0x110 el0t_64_sync+0x15c/0x160 Code: d2800002 910092c0 52800023 f9800011 (885f7c01) ---[ end trace 7e42dd9a39ed2281 ]--- Fix by using mt76_dev_phy() which will map band_idx to the correct phy for all hardware combinations. Fixes: 0335c034e726 ("wifi: mt76: fix race condition related to checking tx queue fill status") Link: https://github.com/openwrt/openwrt/issues/14548 Signed-off-by: Bjørn Mork Link: https://patch.msgid.link/20240713130010.516037-1-bjorn@mork.no Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 85bffcf4f6fb..bf4541e76ba2 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1503,7 +1503,7 @@ EXPORT_SYMBOL_GPL(mt76_wcid_init); void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) { - struct mt76_phy *phy = dev->phys[wcid->phy_idx]; + struct mt76_phy *phy = mt76_dev_phy(dev, wcid->phy_idx); struct ieee80211_hw *hw; struct sk_buff_head list; struct sk_buff *skb; From 50d87e3b70980abc090676b6b4703fcbd96221f9 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Fri, 16 Aug 2024 17:46:25 +0800 Subject: [PATCH 044/534] wifi: mt76: mt7996: use hweight16 to get correct tx antenna [ Upstream commit f98c3de92bb05dac4a4969df8a4595ed380b4604 ] The chainmask is u16 so using hweight8 cannot get correct tx_ant. Without this patch, the tx_ant of band 2 would be -1 and lead to the following issue: BUG: KASAN: stack-out-of-bounds in mt7996_mcu_add_sta+0x12e0/0x16e0 [mt7996e] Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20240816094635.2391-1-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index b66f712e1b17..1c3eec84892c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1412,7 +1412,7 @@ mt7996_mcu_sta_bfer_tlv(struct mt7996_dev *dev, struct sk_buff *skb, { struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; struct mt7996_phy *phy = mvif->phy; - int tx_ant = hweight8(phy->mt76->chainmask) - 1; + int tx_ant = hweight16(phy->mt76->chainmask) - 1; struct sta_rec_bf *bf; struct tlv *tlv; const u8 matrix[4][4] = { From 7146e5aeff6d6874b423bb97955281f3b5dee638 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Fri, 16 Aug 2024 17:46:26 +0800 Subject: [PATCH 045/534] wifi: mt76: mt7996: fix traffic delay when switching back to working channel [ Upstream commit 376200f095d0c3a7096199b336204698d7086279 ] During scanning, UNI_CHANNEL_RX_PATH tag is necessary for the firmware to properly stop and resume MAC TX queue. Without this tag, HW needs more time to resume traffic when switching back to working channel. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20240816094635.2391-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 7fea9f0d409b..85f6a9f4f188 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -287,6 +287,10 @@ int mt7996_set_channel(struct mt7996_phy *phy) if (ret) goto out; + ret = mt7996_mcu_set_chan_info(phy, UNI_CHANNEL_RX_PATH); + if (ret) + goto out; + ret = mt7996_dfs_init_radar_detector(phy); mt7996_mac_cca_stats_reset(phy); From 29516e5db9c64919b821ccc0b358cbb91f0fef77 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Fri, 16 Aug 2024 17:46:27 +0800 Subject: [PATCH 046/534] wifi: mt76: mt7996: fix wmm set of station interface to 3 [ Upstream commit 9265397caacf5c0c2d10c40b2958a474664ebd9e ] According to connac3 HW design, the WMM index of AP and STA interface should be 0 and 3, respectively. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20240816094635.2391-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 85f6a9f4f188..0e69f0a50861 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -190,7 +190,7 @@ static int mt7996_add_interface(struct ieee80211_hw *hw, mvif->mt76.omac_idx = idx; mvif->phy = phy; mvif->mt76.band_idx = band_idx; - mvif->mt76.wmm_idx = vif->type != NL80211_IFTYPE_AP; + mvif->mt76.wmm_idx = vif->type == NL80211_IFTYPE_AP ? 0 : 3; ret = mt7996_mcu_add_dev_info(phy, vif, true); if (ret) From c07082fa242164c311c66fb1c50b6fd5c4233e06 Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Fri, 16 Aug 2024 17:46:29 +0800 Subject: [PATCH 047/534] wifi: mt76: mt7996: fix HE and EHT beamforming capabilities [ Upstream commit e1f4847fdbdf5d44ae60e035c131920e5ab88598 ] Fix HE and EHT beamforming capabilities for different bands and interface types. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Fixes: 348533eb968d ("wifi: mt76: mt7996: add EHT capability init") Signed-off-by: Howard Hsu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20240816094635.2391-5-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- .../net/wireless/mediatek/mt76/mt7996/init.c | 65 ++++++++++++------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 2016ed9197fe..aee531cab46f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -561,8 +561,6 @@ mt7996_set_stream_he_txbf_caps(struct mt7996_phy *phy, return; elem->phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; - if (vif == NL80211_IFTYPE_AP) - elem->phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; c = FIELD_PREP(IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK, sts - 1) | @@ -570,6 +568,11 @@ mt7996_set_stream_he_txbf_caps(struct mt7996_phy *phy, sts - 1); elem->phy_cap_info[5] |= c; + if (vif != NL80211_IFTYPE_AP) + return; + + elem->phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; + c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB | IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB; elem->phy_cap_info[6] |= c; @@ -729,7 +732,6 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, IEEE80211_EHT_MAC_CAP0_OM_CONTROL; eht_cap_elem->phy_cap_info[0] = - IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ | IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI | IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER | IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; @@ -743,30 +745,36 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, u8_encode_bits(u8_get_bits(val, GENMASK(2, 1)), IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK) | u8_encode_bits(val, - IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK) | - u8_encode_bits(val, - IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK); + IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK); eht_cap_elem->phy_cap_info[2] = u8_encode_bits(sts - 1, IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_80MHZ_MASK) | - u8_encode_bits(sts - 1, IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK) | - u8_encode_bits(sts - 1, IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK); + u8_encode_bits(sts - 1, IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK); + + if (band == NL80211_BAND_6GHZ) { + eht_cap_elem->phy_cap_info[0] |= + IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; + + eht_cap_elem->phy_cap_info[1] |= + u8_encode_bits(val, + IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK); + + eht_cap_elem->phy_cap_info[2] |= + u8_encode_bits(sts - 1, + IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK); + } eht_cap_elem->phy_cap_info[3] = IEEE80211_EHT_PHY_CAP3_NG_16_SU_FEEDBACK | IEEE80211_EHT_PHY_CAP3_NG_16_MU_FEEDBACK | IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK | - IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK | - IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK | - IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK | - IEEE80211_EHT_PHY_CAP3_TRIG_CQI_FDBK; + IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK; eht_cap_elem->phy_cap_info[4] = u8_encode_bits(min_t(int, sts - 1, 2), IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK); eht_cap_elem->phy_cap_info[5] = - IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK | u8_encode_bits(IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US, IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK) | u8_encode_bits(u8_get_bits(0x11, GENMASK(1, 0)), @@ -780,14 +788,6 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK) | u8_encode_bits(val, IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK); - eht_cap_elem->phy_cap_info[7] = - IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | - IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | - IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ | - IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | - IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | - IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ; - val = u8_encode_bits(nss, IEEE80211_EHT_MCS_NSS_RX) | u8_encode_bits(nss, IEEE80211_EHT_MCS_NSS_TX); #define SET_EHT_MAX_NSS(_bw, _val) do { \ @@ -798,8 +798,29 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, SET_EHT_MAX_NSS(80, val); SET_EHT_MAX_NSS(160, val); - SET_EHT_MAX_NSS(320, val); + if (band == NL80211_BAND_6GHZ) + SET_EHT_MAX_NSS(320, val); #undef SET_EHT_MAX_NSS + + if (iftype != NL80211_IFTYPE_AP) + return; + + eht_cap_elem->phy_cap_info[3] |= + IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK | + IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK; + + eht_cap_elem->phy_cap_info[7] = + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ; + + if (band != NL80211_BAND_6GHZ) + return; + + eht_cap_elem->phy_cap_info[7] |= + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ; } static void From 6f68e1e9ade6775179e75c994178f90727015393 Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Fri, 16 Aug 2024 17:46:31 +0800 Subject: [PATCH 048/534] wifi: mt76: mt7996: fix EHT beamforming capability check [ Upstream commit 9ca65757f0a5b393a7737d37f377d5daf91716af ] If a VIF acts as a beamformer, it should check peer's beamformee capability, and vice versa. Fixes: ba01944adee9 ("wifi: mt76: mt7996: add EHT beamforming support") Signed-off-by: Howard Hsu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20240816094635.2391-7-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 1c3eec84892c..325bbe3415c2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1188,10 +1188,10 @@ mt7996_is_ebf_supported(struct mt7996_phy *phy, struct ieee80211_vif *vif, if (bfee) return vif->bss_conf.eht_su_beamformee && - EHT_PHY(CAP0_SU_BEAMFORMEE, pe->phy_cap_info[0]); + EHT_PHY(CAP0_SU_BEAMFORMER, pe->phy_cap_info[0]); else return vif->bss_conf.eht_su_beamformer && - EHT_PHY(CAP0_SU_BEAMFORMER, pe->phy_cap_info[0]); + EHT_PHY(CAP0_SU_BEAMFORMEE, pe->phy_cap_info[0]); } if (sta->deflink.he_cap.has_he) { From fb2d057539eda67ec7cfc369bf587e6518a9b99d Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 5 Sep 2024 16:08:54 +0800 Subject: [PATCH 049/534] x86/sgx: Fix deadlock in SGX NUMA node search [ Upstream commit 9c936844010466535bd46ea4ce4656ef17653644 ] When the current node doesn't have an EPC section configured by firmware and all other EPC sections are used up, CPU can get stuck inside the while loop that looks for an available EPC page from remote nodes indefinitely, leading to a soft lockup. Note how nid_of_current will never be equal to nid in that while loop because nid_of_current is not set in sgx_numa_mask. Also worth mentioning is that it's perfectly fine for the firmware not to setup an EPC section on a node. While setting up an EPC section on each node can enhance performance, it is not a requirement for functionality. Rework the loop to start and end on *a* node that has SGX memory. This avoids the deadlock looking for the current SGX-lacking node to show up in the loop when it never will. Fixes: 901ddbb9ecf5 ("x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()") Reported-by: "Molina Sabido, Gerardo" Signed-off-by: Aaron Lu Signed-off-by: Dave Hansen Reviewed-by: Kai Huang Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Tested-by: Zhimin Luo Link: https://lore.kernel.org/all/20240905080855.1699814-2-aaron.lu%40intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/sgx/main.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 166692f2d501..c7f8c3200e8d 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -474,24 +474,25 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void) { struct sgx_epc_page *page; int nid_of_current = numa_node_id(); - int nid = nid_of_current; + int nid_start, nid; - if (node_isset(nid_of_current, sgx_numa_mask)) { - page = __sgx_alloc_epc_page_from_node(nid_of_current); - if (page) - return page; - } - - /* Fall back to the non-local NUMA nodes: */ - while (true) { - nid = next_node_in(nid, sgx_numa_mask); - if (nid == nid_of_current) - break; + /* + * Try local node first. If it doesn't have an EPC section, + * fall back to the non-local NUMA nodes. + */ + if (node_isset(nid_of_current, sgx_numa_mask)) + nid_start = nid_of_current; + else + nid_start = next_node_in(nid_of_current, sgx_numa_mask); + nid = nid_start; + do { page = __sgx_alloc_epc_page_from_node(nid); if (page) return page; - } + + nid = next_node_in(nid, sgx_numa_mask); + } while (nid != nid_start); return ERR_PTR(-ENOMEM); } From 4589bb97e42f2cfc5cbf0fe09d9e5afad905ec62 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 4 Sep 2024 22:19:08 -0400 Subject: [PATCH 050/534] pm:cpupower: Add missing powercap_set_enabled() stub function [ Upstream commit 4b80294fb53845dc5c98cca0c989da09150f2ca9 ] There was a symbol listed in the powercap.h file that was not implemented. Implement it with a stub return of 0. Programs like SWIG require that functions that are defined in the headers be implemented. Fixes: c2294c1496b7 ("cpupower: Introduce powercap intel-rapl library and powercap-info command") Suggested-by: Shuah Khan Signed-off-by: John B. Wyatt IV Signed-off-by: John B. Wyatt IV Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/power/cpupower/lib/powercap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c index a7a59c6bacda..94a0c69e55ef 100644 --- a/tools/power/cpupower/lib/powercap.c +++ b/tools/power/cpupower/lib/powercap.c @@ -77,6 +77,14 @@ int powercap_get_enabled(int *mode) return sysfs_get_enabled(path, mode); } +/* + * TODO: implement function. Returns dummy 0 for now. + */ +int powercap_set_enabled(int mode) +{ + return 0; +} + /* * Hardcoded, because rapl is the only powercap implementation - * this needs to get more generic if more powercap implementations From 7803e8cdaa847a0ca65ff13e5db8c19d3b0e1fb9 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 31 Aug 2024 19:48:30 +0800 Subject: [PATCH 051/534] crypto: hisilicon/hpre - mask cluster timeout error [ Upstream commit 145013f723947c83b1a5f76a0cf6e7237d59e973 ] The timeout threshold of the hpre cluster is 16ms. When the CPU and device share virtual address, page fault processing time may exceed the threshold. In the current test, there is a high probability that the cluster times out. However, the cluster is waiting for the completion of memory access, which is not an error, the device does not need to be reset. If an error occurs in the cluster, qm also reports the error. Therefore, the cluster timeout error of hpre can be masked. Fixes: d90fab0deb8e ("crypto: hisilicon/qm - get error type from hardware registers") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/hisilicon/hpre/hpre_main.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index b97ce0ee7140..e0cc429130c2 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -13,9 +13,7 @@ #include #include "hpre.h" -#define HPRE_QM_ABNML_INT_MASK 0x100004 #define HPRE_CTRL_CNT_CLR_CE_BIT BIT(0) -#define HPRE_COMM_CNT_CLR_CE 0x0 #define HPRE_CTRL_CNT_CLR_CE 0x301000 #define HPRE_FSM_MAX_CNT 0x301008 #define HPRE_VFG_AXQOS 0x30100c @@ -42,7 +40,6 @@ #define HPRE_HAC_INT_SET 0x301500 #define HPRE_RNG_TIMEOUT_NUM 0x301A34 #define HPRE_CORE_INT_ENABLE 0 -#define HPRE_CORE_INT_DISABLE GENMASK(21, 0) #define HPRE_RDCHN_INI_ST 0x301a00 #define HPRE_CLSTR_BASE 0x302000 #define HPRE_CORE_EN_OFFSET 0x04 @@ -66,7 +63,6 @@ #define HPRE_CLSTR_ADDR_INTRVL 0x1000 #define HPRE_CLUSTER_INQURY 0x100 #define HPRE_CLSTR_ADDR_INQRY_RSLT 0x104 -#define HPRE_TIMEOUT_ABNML_BIT 6 #define HPRE_PASID_EN_BIT 9 #define HPRE_REG_RD_INTVRL_US 10 #define HPRE_REG_RD_TMOUT_US 1000 @@ -202,9 +198,9 @@ static const struct hisi_qm_cap_info hpre_basic_info[] = { {HPRE_QM_RESET_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0xC37, 0x6C37}, {HPRE_QM_OOO_SHUTDOWN_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0x4, 0x6C37}, {HPRE_QM_CE_MASK_CAP, 0x312C, 0, GENMASK(31, 0), 0x0, 0x8, 0x8}, - {HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0x1FFFFFE}, - {HPRE_RESET_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xBFFFFE}, - {HPRE_OOO_SHUTDOWN_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x22, 0xBFFFFE}, + {HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0x1FFFC3E}, + {HPRE_RESET_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xBFFC3E}, + {HPRE_OOO_SHUTDOWN_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x22, 0xBFFC3E}, {HPRE_CE_MASK_CAP, 0x3138, 0, GENMASK(31, 0), 0x0, 0x1, 0x1}, {HPRE_CLUSTER_NUM_CAP, 0x313c, 20, GENMASK(3, 0), 0x0, 0x4, 0x1}, {HPRE_CORE_TYPE_NUM_CAP, 0x313c, 16, GENMASK(3, 0), 0x0, 0x2, 0x2}, @@ -653,11 +649,6 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE); writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG); - /* HPRE need more time, we close this interrupt */ - val = readl_relaxed(qm->io_base + HPRE_QM_ABNML_INT_MASK); - val |= BIT(HPRE_TIMEOUT_ABNML_BIT); - writel_relaxed(val, qm->io_base + HPRE_QM_ABNML_INT_MASK); - if (qm->ver >= QM_HW_V3) writel(HPRE_RSA_ENB | HPRE_ECC_ENB, qm->io_base + HPRE_TYPES_ENB); @@ -666,9 +657,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) writel(HPRE_QM_VFG_AX_MASK, qm->io_base + HPRE_VFG_AXCACHE); writel(0x0, qm->io_base + HPRE_BD_ENDIAN); - writel(0x0, qm->io_base + HPRE_INT_MASK); writel(0x0, qm->io_base + HPRE_POISON_BYPASS); - writel(0x0, qm->io_base + HPRE_COMM_CNT_CLR_CE); writel(0x0, qm->io_base + HPRE_ECC_BYPASS); writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_ARUSR_CFG); @@ -758,7 +747,7 @@ static void hpre_hw_error_disable(struct hisi_qm *qm) static void hpre_hw_error_enable(struct hisi_qm *qm) { - u32 ce, nfe; + u32 ce, nfe, err_en; ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_CE_MASK_CAP, qm->cap_ver); nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver); @@ -775,7 +764,8 @@ static void hpre_hw_error_enable(struct hisi_qm *qm) hpre_master_ooo_ctrl(qm, true); /* enable hpre hw error interrupts */ - writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); + err_en = ce | nfe | HPRE_HAC_RAS_FE_ENABLE; + writel(~err_en, qm->io_base + HPRE_INT_MASK); } static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file) From 8b21a9b1d8f002822be3347db36e4be25c275d05 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 31 Aug 2024 19:48:29 +0800 Subject: [PATCH 052/534] crypto: hisilicon/qm - reset device before enabling it [ Upstream commit 5d2d1ee0874c26b8010ddf7f57e2f246e848af38 ] Before the device is enabled again, the device may still store the previously processed data. If an error occurs in the previous task, the device may fail to be enabled again. Therefore, before enabling device, reset the device to restore the initial state. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu Stable-dep-of: b04f06fc0243 ("crypto: hisilicon/qm - inject error before stopping queue") Signed-off-by: Sasha Levin --- drivers/crypto/hisilicon/hpre/hpre_main.c | 32 +++--- drivers/crypto/hisilicon/qm.c | 114 +++++++++++++++------- drivers/crypto/hisilicon/sec2/sec_main.c | 16 ++- drivers/crypto/hisilicon/zip/zip_main.c | 23 +++-- 4 files changed, 121 insertions(+), 64 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index e0cc429130c2..3463f5ee83c0 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -353,6 +353,8 @@ static struct dfx_diff_registers hpre_diff_regs[] = { }, }; +static const struct hisi_qm_err_ini hpre_err_ini; + bool hpre_check_alg_support(struct hisi_qm *qm, u32 alg) { u32 cap_val; @@ -1151,6 +1153,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &hpre_devices; + qm->err_ini = &hpre_err_ini; if (pf_q_num_flag) set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } @@ -1340,8 +1343,6 @@ static int hpre_pf_probe_init(struct hpre *hpre) hpre_open_sva_prefetch(qm); - qm->err_ini = &hpre_err_ini; - qm->err_ini->err_info_init(qm); hisi_qm_dev_err_init(qm); ret = hpre_show_last_regs_init(qm); if (ret) @@ -1370,6 +1371,18 @@ static int hpre_probe_init(struct hpre *hpre) return 0; } +static void hpre_probe_uninit(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + hpre_cnt_regs_clear(qm); + qm->debug.curr_qm_qp_num = 0; + hpre_show_last_regs_uninit(qm); + hpre_close_sva_prefetch(qm); + hisi_qm_dev_err_uninit(qm); +} + static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct hisi_qm *qm; @@ -1395,7 +1408,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = hisi_qm_start(qm); if (ret) - goto err_with_err_init; + goto err_with_probe_init; ret = hpre_debugfs_init(qm); if (ret) @@ -1432,9 +1445,8 @@ err_with_qm_start: hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); -err_with_err_init: - hpre_show_last_regs_uninit(qm); - hisi_qm_dev_err_uninit(qm); +err_with_probe_init: + hpre_probe_uninit(qm); err_with_qm_init: hisi_qm_uninit(qm); @@ -1455,13 +1467,7 @@ static void hpre_remove(struct pci_dev *pdev) hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); - if (qm->fun_type == QM_HW_PF) { - hpre_cnt_regs_clear(qm); - qm->debug.curr_qm_qp_num = 0; - hpre_show_last_regs_uninit(qm); - hisi_qm_dev_err_uninit(qm); - } - + hpre_probe_uninit(qm); hisi_qm_uninit(qm); } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 562df5c77c63..2a6ea3815cfb 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -455,6 +455,7 @@ static struct qm_typical_qos_table shaper_cbs_s[] = { }; static void qm_irqs_unregister(struct hisi_qm *qm); +static int qm_reset_device(struct hisi_qm *qm); static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new) { @@ -4199,6 +4200,22 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) return 0; } +static int qm_master_ooo_check(struct hisi_qm *qm) +{ + u32 val; + int ret; + + /* Check the ooo register of the device before resetting the device. */ + writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, qm->io_base + ACC_MASTER_GLOBAL_CTRL); + ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN, + val, (val == ACC_MASTER_TRANS_RETURN_RW), + POLL_PERIOD, POLL_TIMEOUT); + if (ret) + pci_warn(qm->pdev, "Bus lock! Please reset system.\n"); + + return ret; +} + static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) { u32 nfe_enb = 0; @@ -4221,11 +4238,10 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) } } -static int qm_soft_reset(struct hisi_qm *qm) +static int qm_soft_reset_prepare(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; int ret; - u32 val; /* Ensure all doorbells and mailboxes received by QM */ ret = qm_check_req_recv(qm); @@ -4247,29 +4263,23 @@ static int qm_soft_reset(struct hisi_qm *qm) } qm_dev_ecc_mbit_handle(qm); - - /* OOO register set and check */ - writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, - qm->io_base + ACC_MASTER_GLOBAL_CTRL); - - /* If bus lock, reset chip */ - ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN, - val, - (val == ACC_MASTER_TRANS_RETURN_RW), - POLL_PERIOD, POLL_TIMEOUT); - if (ret) { - pci_emerg(pdev, "Bus lock! Please reset system.\n"); + ret = qm_master_ooo_check(qm); + if (ret) return ret; - } if (qm->err_ini->close_sva_prefetch) qm->err_ini->close_sva_prefetch(qm); ret = qm_set_pf_mse(qm, false); - if (ret) { + if (ret) pci_err(pdev, "Fails to disable pf MSE bit.\n"); - return ret; - } + + return ret; +} + +static int qm_reset_device(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; /* The reset related sub-control registers are not in PCI BAR */ if (ACPI_HANDLE(&pdev->dev)) { @@ -4288,12 +4298,23 @@ static int qm_soft_reset(struct hisi_qm *qm) pci_err(pdev, "Reset step %llu failed!\n", value); return -EIO; } - } else { - pci_err(pdev, "No reset method!\n"); - return -EINVAL; + + return 0; } - return 0; + pci_err(pdev, "No reset method!\n"); + return -EINVAL; +} + +static int qm_soft_reset(struct hisi_qm *qm) +{ + int ret; + + ret = qm_soft_reset_prepare(qm); + if (ret) + return ret; + + return qm_reset_device(qm); } static int qm_vf_reset_done(struct hisi_qm *qm) @@ -5261,6 +5282,35 @@ err_request_mem_regions: return ret; } +static int qm_clear_device(struct hisi_qm *qm) +{ + acpi_handle handle = ACPI_HANDLE(&qm->pdev->dev); + int ret; + + if (qm->fun_type == QM_HW_VF) + return 0; + + /* Device does not support reset, return */ + if (!qm->err_ini->err_info_init) + return 0; + qm->err_ini->err_info_init(qm); + + if (!handle) + return 0; + + /* No reset method, return */ + if (!acpi_has_method(handle, qm->err_info.acpi_rst)) + return 0; + + ret = qm_master_ooo_check(qm); + if (ret) { + writel(0x0, qm->io_base + ACC_MASTER_GLOBAL_CTRL); + return ret; + } + + return qm_reset_device(qm); +} + static int hisi_qm_pci_init(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5290,8 +5340,14 @@ static int hisi_qm_pci_init(struct hisi_qm *qm) goto err_get_pci_res; } + ret = qm_clear_device(qm); + if (ret) + goto err_free_vectors; + return 0; +err_free_vectors: + pci_free_irq_vectors(pdev); err_get_pci_res: qm_put_pci_res(qm); err_disable_pcidev: @@ -5557,7 +5613,6 @@ static int qm_prepare_for_suspend(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; int ret; - u32 val; ret = qm->ops->set_msi(qm, false); if (ret) { @@ -5565,18 +5620,9 @@ static int qm_prepare_for_suspend(struct hisi_qm *qm) return ret; } - /* shutdown OOO register */ - writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, - qm->io_base + ACC_MASTER_GLOBAL_CTRL); - - ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN, - val, - (val == ACC_MASTER_TRANS_RETURN_RW), - POLL_PERIOD, POLL_TIMEOUT); - if (ret) { - pci_emerg(pdev, "Bus lock! Please reset system.\n"); + ret = qm_master_ooo_check(qm); + if (ret) return ret; - } ret = qm_set_pf_mse(qm, false); if (ret) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index bf02a6b2eed4..cf7b6a37e7df 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -1061,9 +1061,6 @@ static int sec_pf_probe_init(struct sec_dev *sec) struct hisi_qm *qm = &sec->qm; int ret; - qm->err_ini = &sec_err_ini; - qm->err_ini->err_info_init(qm); - ret = sec_set_user_domain_and_cache(qm); if (ret) return ret; @@ -1118,6 +1115,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &sec_devices; + qm->err_ini = &sec_err_ini; if (pf_q_num_flag) set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) { @@ -1182,6 +1180,12 @@ static int sec_probe_init(struct sec_dev *sec) static void sec_probe_uninit(struct hisi_qm *qm) { + if (qm->fun_type == QM_HW_VF) + return; + + sec_debug_regs_clear(qm); + sec_show_last_regs_uninit(qm); + sec_close_sva_prefetch(qm); hisi_qm_dev_err_uninit(qm); } @@ -1274,7 +1278,6 @@ err_qm_stop: sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); err_probe_uninit: - sec_show_last_regs_uninit(qm); sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -1296,11 +1299,6 @@ static void sec_remove(struct pci_dev *pdev) sec_debugfs_exit(qm); (void)hisi_qm_stop(qm, QM_NORMAL); - - if (qm->fun_type == QM_HW_PF) - sec_debug_regs_clear(qm); - sec_show_last_regs_uninit(qm); - sec_probe_uninit(qm); sec_qm_uninit(qm); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index cd7ecb2180bf..9d47b3675da7 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -1150,8 +1150,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) hisi_zip->ctrl = ctrl; ctrl->hisi_zip = hisi_zip; - qm->err_ini = &hisi_zip_err_ini; - qm->err_ini->err_info_init(qm); ret = hisi_zip_set_user_domain_and_cache(qm); if (ret) @@ -1212,6 +1210,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &zip_devices; + qm->err_ini = &hisi_zip_err_ini; if (pf_q_num_flag) set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) { @@ -1278,6 +1277,16 @@ static int hisi_zip_probe_init(struct hisi_zip *hisi_zip) return 0; } +static void hisi_zip_probe_uninit(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + hisi_zip_show_last_regs_uninit(qm); + hisi_zip_close_sva_prefetch(qm); + hisi_qm_dev_err_uninit(qm); +} + static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct hisi_zip *hisi_zip; @@ -1304,7 +1313,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = hisi_qm_start(qm); if (ret) - goto err_dev_err_uninit; + goto err_probe_uninit; ret = hisi_zip_debugfs_init(qm); if (ret) @@ -1341,9 +1350,8 @@ err_qm_stop: hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); -err_dev_err_uninit: - hisi_zip_show_last_regs_uninit(qm); - hisi_qm_dev_err_uninit(qm); +err_probe_uninit: + hisi_zip_probe_uninit(qm); err_qm_uninit: hisi_zip_qm_uninit(qm); @@ -1364,8 +1372,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); - hisi_zip_show_last_regs_uninit(qm); - hisi_qm_dev_err_uninit(qm); + hisi_zip_probe_uninit(qm); hisi_zip_qm_uninit(qm); } From aa3e0db35a60002fb34ef0e4ad203aa59fd00203 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 31 Aug 2024 19:48:31 +0800 Subject: [PATCH 053/534] crypto: hisilicon/qm - inject error before stopping queue [ Upstream commit b04f06fc0243600665b3b50253869533b7938468 ] The master ooo cannot be completely closed when the accelerator core reports memory error. Therefore, the driver needs to inject the qm error to close the master ooo. Currently, the qm error is injected after stopping queue, memory may be released immediately after stopping queue, causing the device to access the released memory. Therefore, error is injected to close master ooo before stopping queue to ensure that the device does not access the released memory. Fixes: 6c6dd5802c2d ("crypto: hisilicon/qm - add controller reset interface") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/hisilicon/qm.c | 47 ++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2a6ea3815cfb..1b00edbbfe26 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4106,6 +4106,28 @@ static int qm_set_vf_mse(struct hisi_qm *qm, bool set) return -ETIMEDOUT; } +static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) +{ + u32 nfe_enb = 0; + + /* Kunpeng930 hardware automatically close master ooo when NFE occurs */ + if (qm->ver >= QM_HW_V3) + return; + + if (!qm->err_status.is_dev_ecc_mbit && + qm->err_status.is_qm_ecc_mbit && + qm->err_ini->close_axi_master_ooo) { + qm->err_ini->close_axi_master_ooo(qm); + } else if (qm->err_status.is_dev_ecc_mbit && + !qm->err_status.is_qm_ecc_mbit && + !qm->err_ini->close_axi_master_ooo) { + nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); + writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, + qm->io_base + QM_RAS_NFE_ENABLE); + writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET); + } +} + static int qm_vf_reset_prepare(struct hisi_qm *qm, enum qm_stop_reason stop_reason) { @@ -4170,6 +4192,8 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) return ret; } + qm_dev_ecc_mbit_handle(qm); + /* PF obtains the information of VF by querying the register. */ qm_cmd_uninit(qm); @@ -4216,28 +4240,6 @@ static int qm_master_ooo_check(struct hisi_qm *qm) return ret; } -static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) -{ - u32 nfe_enb = 0; - - /* Kunpeng930 hardware automatically close master ooo when NFE occurs */ - if (qm->ver >= QM_HW_V3) - return; - - if (!qm->err_status.is_dev_ecc_mbit && - qm->err_status.is_qm_ecc_mbit && - qm->err_ini->close_axi_master_ooo) { - qm->err_ini->close_axi_master_ooo(qm); - } else if (qm->err_status.is_dev_ecc_mbit && - !qm->err_status.is_qm_ecc_mbit && - !qm->err_ini->close_axi_master_ooo) { - nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); - writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, - qm->io_base + QM_RAS_NFE_ENABLE); - writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET); - } -} - static int qm_soft_reset_prepare(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -4262,7 +4264,6 @@ static int qm_soft_reset_prepare(struct hisi_qm *qm) return ret; } - qm_dev_ecc_mbit_handle(qm); ret = qm_master_ooo_check(qm); if (ret) return ret; From 0940196c3d62cf2016c90011b288f4928d521d52 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 27 Aug 2024 11:29:48 +0200 Subject: [PATCH 054/534] wifi: mt76: mt7603: fix mixed declarations and code [ Upstream commit 9b8d932053b8b45d650360b36f701cf0f9b6470e ] Move the qid variable declaration further up Fixes: b473c0e47f04 ("wifi: mt76: mt7603: fix tx queue of loopback packets") Link: https://patch.msgid.link/20240827093011.18621-1-nbd@nbd.name Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index b3a61b0ddd03..525444953df6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -29,7 +29,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) struct ieee80211_sta *sta; struct mt7603_sta *msta; struct mt76_wcid *wcid; - u8 tid = 0, hwq = 0; + u8 qid, tid = 0, hwq = 0; void *priv; int idx; u32 val; @@ -57,7 +57,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (ieee80211_is_data_qos(hdr->frame_control)) { tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TAG1D_MASK; - u8 qid = tid_to_ac[tid]; + qid = tid_to_ac[tid]; hwq = wmm_queue_map[qid]; skb_set_queue_mapping(skb, qid); } else if (ieee80211_is_data(hdr->frame_control)) { From 9f05824b35a40c4789d1c50b5e07762e1a25b265 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 5 Sep 2024 18:04:00 +0300 Subject: [PATCH 055/534] wifi: cfg80211: fix UBSAN noise in cfg80211_wext_siwscan() [ Upstream commit a26a5107bc52922cf5f67361e307ad66547b51c7 ] Looking at https://syzkaller.appspot.com/bug?extid=1a3986bbd3169c307819 and running reproducer with CONFIG_UBSAN_BOUNDS, I've noticed the following: [ T4985] UBSAN: array-index-out-of-bounds in net/wireless/scan.c:3479:25 [ T4985] index 164 is out of range for type 'struct ieee80211_channel *[]' <...skipped...> [ T4985] Call Trace: [ T4985] [ T4985] dump_stack_lvl+0x1c2/0x2a0 [ T4985] ? __pfx_dump_stack_lvl+0x10/0x10 [ T4985] ? __pfx__printk+0x10/0x10 [ T4985] __ubsan_handle_out_of_bounds+0x127/0x150 [ T4985] cfg80211_wext_siwscan+0x11a4/0x1260 <...the rest is not too useful...> Even if we do 'creq->n_channels = n_channels' before 'creq->ssids = (void *)&creq->channels[n_channels]', UBSAN treats the latter as off-by-one error. Fix this by using pointer arithmetic rather than an expression with explicit array indexing and use convenient 'struct_size()' to simplify the math here and in 'kzalloc()' above. Fixes: 5ba63533bbf6 ("cfg80211: fix alignment problem in scan request") Signed-off-by: Dmitry Antipov Reviewed-by: Kees Cook Link: https://patch.msgid.link/20240905150400.126386-1-dmantipov@yandex.ru [fix coding style for multi-line calculation] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4d88e797ae49..4fc6279750ea 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3209,8 +3209,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, n_channels = ieee80211_get_num_supported_channels(wiphy); } - creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + - n_channels * sizeof(void *), + creq = kzalloc(struct_size(creq, channels, n_channels) + + sizeof(struct cfg80211_ssid), GFP_ATOMIC); if (!creq) return -ENOMEM; @@ -3218,7 +3218,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->wiphy = wiphy; creq->wdev = dev->ieee80211_ptr; /* SSIDs come after channels */ - creq->ssids = (void *)&creq->channels[n_channels]; + creq->ssids = (void *)creq + struct_size(creq, channels, n_channels); creq->n_channels = n_channels; creq->n_ssids = 1; creq->scan_start = jiffies; From 15c1d606fa7558aaa0a29544b2981c13f79afa1f Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Tue, 27 Aug 2024 11:30:08 +0200 Subject: [PATCH 056/534] wifi: mt76: mt7915: fix rx filter setting for bfee functionality [ Upstream commit 6ac80fce713e875a316a58975b830720a3e27721 ] Fix rx filter setting to prevent dropping NDPA frames. Without this change, bfee functionality may behave abnormally. Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets") Signed-off-by: Howard Hsu Link: https://patch.msgid.link/20240827093011.18621-21-nbd@nbd.name Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 260fe00d7dc6..27655dcb7914 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -557,8 +557,7 @@ static void mt7915_configure_filter(struct ieee80211_hw *hw, MT76_FILTER(CONTROL, MT_WF_RFCR_DROP_CTS | MT_WF_RFCR_DROP_RTS | - MT_WF_RFCR_DROP_CTL_RSV | - MT_WF_RFCR_DROP_NDPA); + MT_WF_RFCR_DROP_CTL_RSV); *total_flags = flags; mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter); From 2d9f3e56b9d588e0da5494a9dee37a4025ea7c69 Mon Sep 17 00:00:00 2001 From: Benjamin Lin Date: Fri, 26 Jan 2024 17:09:16 +0800 Subject: [PATCH 057/534] wifi: mt76: mt7996: ensure 4-byte alignment for beacon commands [ Upstream commit 5d197d37809b220616a0fb00856b9eeeafe1f69e ] If TLV includes beacon content, its length might not be 4-byte aligned. Make sure the length is aligned before sending beacon commands to FW. Signed-off-by: Benjamin Lin Signed-off-by: Shayne Chen Signed-off-by: Felix Fietkau Stable-dep-of: 9e461f4a2329 ("wifi: mt76: mt7996: fix uninitialized TLV data") Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 14 +++++--------- drivers/net/wireless/mediatek/mt76/mt7996/mcu.h | 4 ++-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 325bbe3415c2..6d9a92cafe48 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -520,13 +520,10 @@ void mt7996_mcu_rx_event(struct mt7996_dev *dev, struct sk_buff *skb) static struct tlv * mt7996_mcu_add_uni_tlv(struct sk_buff *skb, u16 tag, u16 len) { - struct tlv *ptlv, tlv = { - .tag = cpu_to_le16(tag), - .len = cpu_to_le16(len), - }; + struct tlv *ptlv = skb_put(skb, len); - ptlv = skb_put(skb, len); - memcpy(ptlv, &tlv, sizeof(tlv)); + ptlv->tag = cpu_to_le16(tag); + ptlv->len = cpu_to_le16(len); return ptlv; } @@ -2072,7 +2069,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->mt76->band_idx); - len = sizeof(*bcn) + MT_TXD_SIZE + skb->len; + len = ALIGN(sizeof(*bcn) + MT_TXD_SIZE + skb->len, 4); tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_BCN_CONTENT, len); bcn = (struct bss_bcn_content_tlv *)tlv; bcn->enable = en; @@ -2141,8 +2138,7 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, info->band = band; info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->mt76->band_idx); - len = sizeof(*discov) + MT_TXD_SIZE + skb->len; - + len = ALIGN(sizeof(*discov) + MT_TXD_SIZE + skb->len, 4); tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_OFFLOAD, len); discov = (struct bss_inband_discovery_tlv *)tlv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index dc8d0a30c707..58504b80eae8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -587,10 +587,10 @@ enum { sizeof(struct sta_rec_hdr_trans) + \ sizeof(struct tlv)) -#define MT7996_MAX_BEACON_SIZE 1342 +#define MT7996_MAX_BEACON_SIZE 1338 #define MT7996_BEACON_UPDATE_SIZE (sizeof(struct bss_req_hdr) + \ sizeof(struct bss_bcn_content_tlv) + \ - MT_TXD_SIZE + \ + 4 + MT_TXD_SIZE + \ sizeof(struct bss_bcn_cntdwn_tlv) + \ sizeof(struct bss_bcn_mbss_tlv)) #define MT7996_MAX_BSS_OFFLOAD_SIZE (MT7996_MAX_BEACON_SIZE + \ From 2780657f7f5383f62e5e02a9ae6806dcdd26e47f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 27 Aug 2024 11:30:10 +0200 Subject: [PATCH 058/534] wifi: mt76: mt7996: fix uninitialized TLV data [ Upstream commit 9e461f4a2329109571f4b10f9bcad28d07e6ebb3 ] Use skb_put_zero instead of skb_put Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Link: https://patch.msgid.link/20240827093011.18621-23-nbd@nbd.name Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 6d9a92cafe48..b50743f7e030 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -520,7 +520,7 @@ void mt7996_mcu_rx_event(struct mt7996_dev *dev, struct sk_buff *skb) static struct tlv * mt7996_mcu_add_uni_tlv(struct sk_buff *skb, u16 tag, u16 len) { - struct tlv *ptlv = skb_put(skb, len); + struct tlv *ptlv = skb_put_zero(skb, len); ptlv->tag = cpu_to_le16(tag); ptlv->len = cpu_to_le16(len); From cacdc118984118dac9593eb6a7bec2654f8b903a Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 9 Sep 2024 12:08:06 +0300 Subject: [PATCH 059/534] wifi: cfg80211: fix two more possible UBSAN-detected off-by-one errors [ Upstream commit 15ea13b1b1fbf6364d4cd568e65e4c8479632999 ] Although not reproduced in practice, these two cases may be considered by UBSAN as off-by-one errors. So fix them in the same way as in commit a26a5107bc52 ("wifi: cfg80211: fix UBSAN noise in cfg80211_wext_siwscan()"). Fixes: 807f8a8c3004 ("cfg80211/nl80211: add support for scheduled scans") Fixes: 5ba63533bbf6 ("cfg80211: fix alignment problem in scan request") Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20240909090806.1091956-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 3 ++- net/wireless/sme.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2b2dc46dc701..4ce23762b1c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9679,7 +9679,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, return ERR_PTR(-ENOMEM); if (n_ssids) - request->ssids = (void *)&request->channels[n_channels]; + request->ssids = (void *)request + + struct_size(request, channels, n_channels); request->n_ssids = n_ssids; if (ie_len) { if (n_ssids) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 72d78dbc55ff..591cda99d72f 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -115,7 +115,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) n_channels = i; } request->n_channels = n_channels; - request->ssids = (void *)&request->channels[n_channels]; + request->ssids = (void *)request + + struct_size(request, channels, n_channels); request->n_ssids = 1; memcpy(request->ssids[0].ssid, wdev->conn->params.ssid, From 058c9026ad79dc98572442fd4c7e9a36aba6f596 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 6 Sep 2024 15:31:51 +0300 Subject: [PATCH 060/534] wifi: mac80211: use two-phase skb reclamation in ieee80211_do_stop() [ Upstream commit 9d301de12da6e1bb069a9835c38359b8e8135121 ] Since '__dev_queue_xmit()' should be called with interrupts enabled, the following backtrace: ieee80211_do_stop() ... spin_lock_irqsave(&local->queue_stop_reason_lock, flags) ... ieee80211_free_txskb() ieee80211_report_used_skb() ieee80211_report_ack_skb() cfg80211_mgmt_tx_status_ext() nl80211_frame_tx_status() genlmsg_multicast_netns() genlmsg_multicast_netns_filtered() nlmsg_multicast_filtered() netlink_broadcast_filtered() do_one_broadcast() netlink_broadcast_deliver() __netlink_sendskb() netlink_deliver_tap() __netlink_deliver_tap_skb() dev_queue_xmit() __dev_queue_xmit() ; with IRQS disabled ... spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags) issues the warning (as reported by syzbot reproducer): WARNING: CPU: 2 PID: 5128 at kernel/softirq.c:362 __local_bh_enable_ip+0xc3/0x120 Fix this by implementing a two-phase skb reclamation in 'ieee80211_do_stop()', where actual work is performed outside of a section with interrupts disabled. Fixes: 5061b0c2b906 ("mac80211: cooperate more with network namespaces") Reported-by: syzbot+1a3986bbd3169c307819@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1a3986bbd3169c307819 Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20240906123151.351647-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/iface.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a7c39e895b1e..fae701248f05 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -466,6 +466,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do { struct ieee80211_local *local = sdata->local; unsigned long flags; + struct sk_buff_head freeq; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i, flushed; @@ -652,18 +653,32 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do skb_queue_purge(&sdata->status_queue); } + /* + * Since ieee80211_free_txskb() may issue __dev_queue_xmit() + * which should be called with interrupts enabled, reclamation + * is done in two phases: + */ + __skb_queue_head_init(&freeq); + + /* unlink from local queues... */ spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_walk_safe(&local->pending[i], skb, tmp) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (info->control.vif == &sdata->vif) { __skb_unlink(skb, &local->pending[i]); - ieee80211_free_txskb(&local->hw, skb); + __skb_queue_tail(&freeq, skb); } } } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + /* ... and perform actual reclamation with interrupts enabled. */ + skb_queue_walk_safe(&freeq, skb, tmp) { + __skb_unlink(skb, &freeq); + ieee80211_free_txskb(&local->hw, skb); + } + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ieee80211_txq_remove_vlan(local, sdata); From 84398204c5df5aaf89453056cf0647cda9664d2b Mon Sep 17 00:00:00 2001 From: Jiawei Ye Date: Thu, 29 Aug 2024 08:17:09 +0000 Subject: [PATCH 061/534] wifi: wilc1000: fix potential RCU dereference issue in wilc_parse_join_bss_param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d7c6ae1efb1ff68bc01d79d94fdf0388f86cdd8 ] In the `wilc_parse_join_bss_param` function, the TSF field of the `ies` structure is accessed after the RCU read-side critical section is unlocked. According to RCU usage rules, this is illegal. Reusing this pointer can lead to unpredictable behavior, including accessing memory that has been updated or causing use-after-free issues. This possible bug was identified using a static analysis tool developed by myself, specifically designed to detect RCU-related issues. To address this, the TSF value is now stored in a local variable `ies_tsf` before the RCU lock is released. The `param->tsf_lo` field is then assigned using this local variable, ensuring that the TSF value is safely accessed. Fixes: 205c50306acf ("wifi: wilc1000: fix RCU usage in connect path") Signed-off-by: Jiawei Ye Reviewed-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://patch.msgid.link/tencent_466225AA599BA49627FB26F707EE17BC5407@qq.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index e4bb3ea6e226..25e881ee727c 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -381,6 +381,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct wilc_join_bss_param *param; u8 rates_len = 0; int ies_len; + u64 ies_tsf; int ret; param = kzalloc(sizeof(*param), GFP_KERNEL); @@ -396,6 +397,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, return NULL; } ies_len = ies->len; + ies_tsf = ies->tsf; rcu_read_unlock(); param->beacon_period = cpu_to_le16(bss->beacon_interval); @@ -451,7 +453,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *)&noa_attr, sizeof(noa_attr)); if (ret > 0) { - param->tsf_lo = cpu_to_le32(ies->tsf); + param->tsf_lo = cpu_to_le32(ies_tsf); param->noa_enabled = 1; param->idx = noa_attr.index; if (noa_attr.oppps_ctwindow & IEEE80211_P2P_OPPPS_ENABLE_BIT) { From ea8d90a5b04aa7fd6f6dea89d07a359ed54b2b9b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 30 Aug 2024 17:29:27 -0400 Subject: [PATCH 062/534] Bluetooth: hci_core: Fix sending MGMT_EV_CONNECT_FAILED [ Upstream commit d47da6bd4cfa982fe903f33423b9e2ec541e9496 ] If HCI_CONN_MGMT_CONNECTED has been set then the event shall be HCI_CONN_MGMT_DISCONNECTED. Fixes: b644ba336997 ("Bluetooth: Update device_connected and device_found events to latest API") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 4 ++-- net/bluetooth/hci_conn.c | 6 ++---- net/bluetooth/mgmt.c | 13 +++++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 29f1549ee111..0f50c0cefcb7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2224,8 +2224,8 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, bool mgmt_connected); void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); -void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 status); +void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, + u8 status); void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure); void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d8a01eb016ad..ed95a87ef9ab 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -107,8 +107,7 @@ void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) * where a timeout + cancel does indicate an actual failure. */ if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, status); + mgmt_connect_failed(hdev, conn, status); /* The connection attempt was doing scan for new RPA, and is * in scan phase. If params are not associated with any other @@ -1233,8 +1232,7 @@ void hci_conn_failed(struct hci_conn *conn, u8 status) hci_le_conn_failed(conn, status); break; case ACL_LINK: - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, status); + mgmt_connect_failed(hdev, conn, status); break; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4ae9029b5785..149aff29e564 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9724,13 +9724,18 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, mgmt_pending_remove(cmd); } -void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 status) +void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, u8 status) { struct mgmt_ev_connect_failed ev; - bacpy(&ev.addr.bdaddr, bdaddr); - ev.addr.type = link_to_bdaddr(link_type, addr_type); + if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) { + mgmt_device_disconnected(hdev, &conn->dst, conn->type, + conn->dst_type, status, true); + return; + } + + bacpy(&ev.addr.bdaddr, &conn->dst); + ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type); ev.status = mgmt_status(status); mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL); From 7eebbdde4b947b41ee421478ebef04794a6111c5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 16 Aug 2023 12:05:00 -0700 Subject: [PATCH 063/534] Bluetooth: hci_sync: Ignore errors from HCI_OP_REMOTE_NAME_REQ_CANCEL [ Upstream commit cfbfeee61582e638770a1a10deef866c9adb38f5 ] This ignores errors from HCI_OP_REMOTE_NAME_REQ_CANCEL since it shouldn't interfere with the stopping of discovery and in certain conditions it seems to be failing. Link: https://github.com/bluez/bluez/issues/575 Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index af7817a7c585..75515a1d2923 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5391,7 +5391,10 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) if (!e) return 0; - return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr); + /* Ignore cancel errors since it should interfere with stopping + * of the discovery. + */ + hci_remote_name_cancel_sync(hdev, &e->data.bdaddr); } return 0; From 80bd490ac0a3b662a489e17d8eedeb1e905a3d40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Sep 2024 15:44:49 +0000 Subject: [PATCH 064/534] sock_map: Add a cond_resched() in sock_hash_free() [ Upstream commit b1339be951ad31947ae19bc25cb08769bf255100 ] Several syzbot soft lockup reports all have in common sock_hash_free() If a map with a large number of buckets is destroyed, we need to yield the cpu when needed. Fixes: 75e68e5bf2c7 ("bpf, sockhash: Synchronize delete from bucket list on map free") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20240906154449.3742932-1-edumazet@google.com Signed-off-by: Sasha Levin --- net/core/sock_map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a37143d181f9..2afac40bb83c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1171,6 +1171,7 @@ static void sock_hash_free(struct bpf_map *map) sock_put(elem->sk); sock_hash_free_elem(htab, elem); } + cond_resched(); } /* wait for psock readers accessing its map link */ From c3d941cc734e0c8dc486c062926d5249070af5e4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 4 Sep 2024 18:22:37 -0700 Subject: [PATCH 065/534] can: bcm: Clear bo->bcm_proc_read after remove_proc_entry(). [ Upstream commit 94b0818fa63555a65f6ba107080659ea6bcca63e ] syzbot reported a warning in bcm_release(). [0] The blamed change fixed another warning that is triggered when connect() is issued again for a socket whose connect()ed device has been unregistered. However, if the socket is just close()d without the 2nd connect(), the remaining bo->bcm_proc_read triggers unnecessary remove_proc_entry() in bcm_release(). Let's clear bo->bcm_proc_read after remove_proc_entry() in bcm_notify(). [0] name '4986' WARNING: CPU: 0 PID: 5234 at fs/proc/generic.c:711 remove_proc_entry+0x2e7/0x5d0 fs/proc/generic.c:711 Modules linked in: CPU: 0 UID: 0 PID: 5234 Comm: syz-executor606 Not tainted 6.11.0-rc5-syzkaller-00178-g5517ae241919 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:remove_proc_entry+0x2e7/0x5d0 fs/proc/generic.c:711 Code: ff eb 05 e8 cb 1e 5e ff 48 8b 5c 24 10 48 c7 c7 e0 f7 aa 8e e8 2a 38 8e 09 90 48 c7 c7 60 3a 1b 8c 48 89 de e8 da 42 20 ff 90 <0f> 0b 90 90 48 8b 44 24 18 48 c7 44 24 40 0e 36 e0 45 49 c7 04 07 RSP: 0018:ffffc9000345fa20 EFLAGS: 00010246 RAX: 2a2d0aee2eb64600 RBX: ffff888032f1f548 RCX: ffff888029431e00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc9000345fb08 R08: ffffffff8155b2f2 R09: 1ffff1101710519a R10: dffffc0000000000 R11: ffffed101710519b R12: ffff888011d38640 R13: 0000000000000004 R14: 0000000000000000 R15: dffffc0000000000 FS: 0000000000000000(0000) GS:ffff8880b8800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcfb52722f0 CR3: 000000000e734000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bcm_release+0x250/0x880 net/can/bcm.c:1578 __sock_release net/socket.c:659 [inline] sock_close+0xbc/0x240 net/socket.c:1421 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fcfb51ee969 Code: Unable to access opcode bytes at 0x7fcfb51ee93f. RSP: 002b:00007ffce0109ca8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fcfb51ee969 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001 RBP: 00007fcfb526f3b0 R08: ffffffffffffffb8 R09: 0000555500000000 R10: 0000555500000000 R11: 0000000000000246 R12: 00007fcfb526f3b0 R13: 0000000000000000 R14: 00007fcfb5271ee0 R15: 00007fcfb51bf160 Fixes: 76fe372ccb81 ("can: bcm: Remove proc entry when dev is unregistered.") Reported-by: syzbot+0532ac7a06fb1a03187e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0532ac7a06fb1a03187e Tested-by: syzbot+0532ac7a06fb1a03187e@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240905012237.79683-1-kuniyu@amazon.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/bcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 00208ee13e57..a1f5db0fd5d4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1429,8 +1429,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { #if IS_ENABLED(CONFIG_PROC_FS) - if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) + if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) { remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); + bo->bcm_proc_read = NULL; + } #endif bo->bound = 0; bo->ifindex = 0; From 7fb4f5605c3e77fa36513e0572d4f4690f562d4a Mon Sep 17 00:00:00 2001 From: Jake Hamby Date: Fri, 6 Sep 2024 23:19:51 +0000 Subject: [PATCH 066/534] can: m_can: enable NAPI before enabling interrupts [ Upstream commit 801ad2f87b0c6d0c34a75a4efd6bfd3a2d9f9298 ] If an interrupt (RX-complete or error flag) is set when bringing up the CAN device, e.g. due to CAN bus traffic before initializing the device, when m_can_start() is called and interrupts are enabled, m_can_isr() is called immediately, which disables all CAN interrupts and calls napi_schedule(). Because napi_enable() isn't called until later in m_can_open(), the call to napi_schedule() never schedules the m_can_poll() callback and the device is left with interrupts disabled and can't receive any CAN packets until rebooted. This can be verified by running "cansend" from another device before setting the bitrate and calling "ip link set up can0" on the test device. Adding debug lines to m_can_isr() shows it's called with flags (IR_EP | IR_EW | IR_CRCE), which calls m_can_disable_all_interrupts() and napi_schedule(), and then m_can_poll() is never called. Move the call to napi_enable() above the call to m_can_start() to enable any initial interrupt flags to be handled by m_can_poll() so that interrupts are reenabled. Add a call to napi_disable() in the error handling section of m_can_open(), to handle the case where later functions return errors. Also, in m_can_close(), move the call to napi_disable() below the call to m_can_stop() to ensure all interrupts are handled when bringing down the device. This race condition is much less likely to occur. Tested on a Microchip SAMA7G54 MPU. The fix should be applicable to any SoC with a Bosch M_CAN controller. Signed-off-by: Jake Hamby Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://patch.msgid.link/20240910-can-m_can-fix-ifup-v3-1-6c1720ba45ce@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index fb77fd74de27..66221fdf785f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1599,9 +1599,6 @@ static int m_can_close(struct net_device *dev) netif_stop_queue(dev); - if (!cdev->is_peripheral) - napi_disable(&cdev->napi); - m_can_stop(dev); m_can_clk_stop(cdev); free_irq(dev->irq, dev); @@ -1611,6 +1608,8 @@ static int m_can_close(struct net_device *dev) destroy_workqueue(cdev->tx_wq); cdev->tx_wq = NULL; can_rx_offload_disable(&cdev->offload); + } else { + napi_disable(&cdev->napi); } close_candev(dev); @@ -1842,6 +1841,8 @@ static int m_can_open(struct net_device *dev) if (cdev->is_peripheral) can_rx_offload_enable(&cdev->offload); + else + napi_enable(&cdev->napi); /* register interrupt handler */ if (cdev->is_peripheral) { @@ -1873,9 +1874,6 @@ static int m_can_open(struct net_device *dev) if (err) goto exit_start_fail; - if (!cdev->is_peripheral) - napi_enable(&cdev->napi); - netif_start_queue(dev); return 0; @@ -1889,6 +1887,8 @@ exit_irq_fail: out_wq_fail: if (cdev->is_peripheral) can_rx_offload_disable(&cdev->offload); + else + napi_disable(&cdev->napi); close_candev(dev); exit_disable_clks: m_can_clk_stop(cdev); From d7572187bce65ec13e98e5da101a651d4cbdc2a5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 9 Sep 2024 15:07:41 +0200 Subject: [PATCH 067/534] can: m_can: m_can_close(): stop clocks after device has been shut down [ Upstream commit 2c09b50efcad985cf920ca88baa9aa52b1999dcc ] After calling m_can_stop() an interrupt may be pending or NAPI might still be executed. This means the driver might still touch registers of the IP core after the clocks have been disabled. This is not good practice and might lead to aborts depending on the SoC integration. To avoid these potential problems, make m_can_close() symmetric to m_can_open(), i.e. stop the clocks at the end, right before shutting down the transceiver. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://patch.msgid.link/20240910-can-m_can-fix-ifup-v3-2-6c1720ba45ce@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 66221fdf785f..97666a759595 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1600,7 +1600,6 @@ static int m_can_close(struct net_device *dev) netif_stop_queue(dev); m_can_stop(dev); - m_can_clk_stop(cdev); free_irq(dev->irq, dev); if (cdev->is_peripheral) { @@ -1614,6 +1613,7 @@ static int m_can_close(struct net_device *dev) close_candev(dev); + m_can_clk_stop(cdev); phy_power_off(cdev->transceiver); return 0; From a4a70cba57aa7ff33ad9056708ac426c46256b2a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 9 Sep 2024 16:51:52 -0400 Subject: [PATCH 068/534] Bluetooth: btusb: Fix not handling ZPL/short-transfer [ Upstream commit 7b05933340f4490ef5b09e84d644d12484b05fdf ] Requesting transfers of the exact same size of wMaxPacketSize may result in ZPL/short-transfer since the USB stack cannot handle it as we are limiting the buffer size to be the same as wMaxPacketSize. Also, in terms of throughput this change has the same effect to interrupt endpoint as 290ba200815f "Bluetooth: Improve USB driver throughput by increasing the frame size" had for the bulk endpoint, so users of the advertisement bearer (e.g. BT Mesh) may benefit from this change. Fixes: 5e23b923da03 ("[Bluetooth] Add generic driver for Bluetooth USB devices") Signed-off-by: Luiz Augusto von Dentz Tested-by: Kiran K Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c495fceda20a..0a58106207b0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1352,7 +1352,10 @@ static int btusb_submit_intr_urb(struct hci_dev *hdev, gfp_t mem_flags) if (!urb) return -ENOMEM; - size = le16_to_cpu(data->intr_ep->wMaxPacketSize); + /* Use maximum HCI Event size so the USB stack handles + * ZPL/short-transfer automatically. + */ + size = HCI_MAX_EVENT_SIZE; buf = kmalloc(size, mem_flags); if (!buf) { From 61761f08e361609fd611d326b21408a058138a65 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 11 Sep 2024 11:20:58 +0200 Subject: [PATCH 069/534] bareudp: Pull inner IP header in bareudp_udp_encap_recv(). [ Upstream commit 45fa29c85117170b0508790f878b13ec6593c888 ] Bareudp reads the inner IP header to get the ECN value. Therefore, it needs to ensure that it's part of the skb's linear data. This is similar to the vxlan and geneve fixes for that same problem: * commit f7789419137b ("vxlan: Pull inner IP header in vxlan_rcv().") * commit 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/5205940067c40218a70fbb888080466b2fc288db.1726046181.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bareudp.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 277493e41b07..d0759d8bf730 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -67,6 +67,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) __be16 proto; void *oiph; int err; + int nh; bareudp = rcu_dereference_sk_user_data(sk); if (!bareudp) @@ -144,10 +145,25 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } skb_dst_set(skb, &tun_dst->dst); skb->dev = bareudp->dev; - oiph = skb_network_header(skb); - skb_reset_network_header(skb); skb_reset_mac_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(bareudp->dev, rx_length_errors); + DEV_STATS_INC(bareudp->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else From 905e83c61bdcce04a448125e989e36d2e4e74a92 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 11 Sep 2024 11:21:05 +0200 Subject: [PATCH 070/534] bareudp: Pull inner IP header on xmit. [ Upstream commit c471236b2359e6b27388475dd04fff0a5e2bf922 ] Both bareudp_xmit_skb() and bareudp6_xmit_skb() read their skb's inner IP header to get its ECN value (with ip_tunnel_ecn_encap()). Therefore we need to ensure that the inner IP header is part of the skb's linear data. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/267328222f0a11519c6de04c640a4f87a38ea9ed.1726046181.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bareudp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index d0759d8bf730..54767154de26 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -319,6 +319,9 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be32 saddr; int err; + if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + return -EINVAL; + if (!sock) return -ESHUTDOWN; @@ -382,6 +385,9 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + return -EINVAL; + if (!sock) return -ESHUTDOWN; From 1e8fc4ffa9554dfaf24a90158c2ad523dc4392bb Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Sep 2024 17:44:44 +0800 Subject: [PATCH 071/534] net: enetc: Use IRQF_NO_AUTOEN flag in request_irq() [ Upstream commit 799a9225997799f7b1b579bc50a93b78b4fb2a01 ] disable_irq() after request_irq() still has a time gap in which interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ auto-enable when request IRQ. Fixes: bbb96dc7fa1a ("enetc: Factor out the traffic start/stop procedures") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240911094445.1922476-3-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 0f5a4ec505dd..18e3a9cd4fc0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2305,12 +2305,11 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv) snprintf(v->name, sizeof(v->name), "%s-rxtx%d", priv->ndev->name, i); - err = request_irq(irq, enetc_msix, 0, v->name, v); + err = request_irq(irq, enetc_msix, IRQF_NO_AUTOEN, v->name, v); if (err) { dev_err(priv->dev, "request_irq() failed!\n"); goto irq_err; } - disable_irq(irq); v->tbier_base = hw->reg + ENETC_BDR(TX, 0, ENETC_TBIER); v->rbier = hw->reg + ENETC_BDR(RX, i, ENETC_RBIER); From 50d062b6cc90c45a0de54e9bd9903c82777d66bf Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 11 Sep 2024 15:51:11 +0200 Subject: [PATCH 072/534] r8169: disable ALDPS per default for RTL8125 [ Upstream commit b9c7ac4fe22c608acf6153a3329df2b6b6cd416c ] En-Wei reported that traffic breaks if cable is unplugged for more than 3s and then re-plugged. This was supposed to be fixed by 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125"). But apparently this didn't fix the issue for everybody. The 3s threshold rang a bell, as this is the delay after which ALDPS kicks in. And indeed disabling ALDPS fixes the issue for this user. Maybe this fixes the issue in general. In a follow-up step we could remove the first fix attempt and see whether anybody complains. Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Tested-by: En-Wei WU Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/778b9d86-05c4-4856-be59-cde4487b9e52@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_phy_config.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index b50f16786c24..6ab89f478285 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1060,6 +1060,7 @@ static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000); rtl8168g_enable_gphy_10m(phydev); + rtl8168g_disable_aldps(phydev); rtl8125a_config_eee_phy(phydev); } @@ -1099,6 +1100,7 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xbf8, 0x12, 0xe000, 0xa000); rtl8125_legacy_force_mode(phydev); + rtl8168g_disable_aldps(phydev); rtl8125b_config_eee_phy(phydev); } From a46add42bd068cef9ac12cf902dae03e610d245c Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 11 Sep 2024 19:45:57 +0200 Subject: [PATCH 073/534] net: ipv6: rpl_iptunnel: Fix memory leak in rpl_input [ Upstream commit 2c84b0aa28b9e73e8c4b4ce038269469434ae372 ] Free the skb before returning from rpl_input when skb_cow_head() fails. Use a "drop" label and goto instructions. Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Signed-off-by: Justin Iurman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240911174557.11536-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/rpl_iptunnel.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 2c83b7586422..db3c19a42e1c 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -263,10 +263,8 @@ static int rpl_input(struct sk_buff *skb) rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); err = rpl_do_srh(skb, rlwt); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto drop; local_bh_disable(); dst = dst_cache_get(&rlwt->cache); @@ -286,9 +284,13 @@ static int rpl_input(struct sk_buff *skb) err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto drop; return dst_input(skb); + +drop: + kfree_skb(skb); + return err; } static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype, From 2b5e904deabb724c38011b14a4c3407c910a2187 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 12 Sep 2024 19:01:20 +0800 Subject: [PATCH 074/534] net: tipc: avoid possible garbage value [ Upstream commit 99655a304e450baaae6b396cb942b9e47659d644 ] Clang static checker (scan-build) warning: net/tipc/bcast.c:305:4: The expression is an uninitialized value. The computed value will also be garbage [core.uninitialized.Assign] 305 | (*cong_link_cnt)++; | ^~~~~~~~~~~~~~~~~~ tipc_rcast_xmit() will increase cong_link_cnt's value, but cong_link_cnt is uninitialized. Although it won't really cause a problem, it's better to fix it. Fixes: dca4a17d24ee ("tipc: fix potential hanging after b/rcast changing") Signed-off-by: Su Hui Reviewed-by: Justin Stitt Link: https://patch.msgid.link/20240912110119.2025503-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/bcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 593846d25214..114fef65f92e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -320,8 +320,8 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, { struct tipc_msg *hdr, *_hdr; struct sk_buff_head tmpq; + u16 cong_link_cnt = 0; struct sk_buff *_skb; - u16 cong_link_cnt; int rc = 0; /* Is a cluster supporting with new capabilities ? */ From 0ceb2f2b5c813f932d6e60d3feec5e7e713da783 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2024 08:31:47 +0000 Subject: [PATCH 075/534] ipv6: avoid possible NULL deref in rt6_uncached_list_flush_dev() [ Upstream commit 04ccecfa959d3b9ae7348780d8e379c6486176ac ] Blamed commit accidentally removed a check for rt->rt6i_idev being NULL, as spotted by syzbot: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 10998 Comm: syz-executor Not tainted 6.11.0-rc6-syzkaller-00208-g625403177711 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: addrconf_ifdown+0x15d/0x1bd0 net/ipv6/addrconf.c:3856 addrconf_notify+0x3cb/0x1020 notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:2032 [inline] call_netdevice_notifiers net/core/dev.c:2046 [inline] unregister_netdevice_many_notify+0xd81/0x1c40 net/core/dev.c:11352 unregister_netdevice_many net/core/dev.c:11414 [inline] unregister_netdevice_queue+0x303/0x370 net/core/dev.c:11289 unregister_netdevice include/linux/netdevice.h:3129 [inline] __tun_detach+0x6b9/0x1600 drivers/net/tun.c:685 tun_detach drivers/net/tun.c:701 [inline] tun_chr_close+0x108/0x1b0 drivers/net/tun.c:3510 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1acc77def9 Code: Unable to access opcode bytes at 0x7f1acc77decf. RSP: 002b:00007ffeb26fa738 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1acc77def9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000043 RBP: 00007f1acc7dd508 R08: 00007ffeb26f84d7 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000003 R14: 00000000ffffffff R15: 00007ffeb26fa8e0 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: e332bc67cf5e ("ipv6: Don't call with rt6_uncached_list_flush_dev") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240913083147.3095442-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0299886dbeb9..a9104c4c1c02 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -175,7 +175,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) struct net_device *rt_dev = rt->dst.dev; bool handled = false; - if (rt_idev->dev == dev) { + if (rt_idev && rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); handled = true; From 75a5e5909b1fb97413c46022dc192730e550711b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 12 Aug 2024 09:36:24 +0800 Subject: [PATCH 076/534] ublk: move zone report data out of request pdu [ Upstream commit 9327b51c9a9c864f5177127e09851da9d78b4943 ] ublk zoned takes 16 bytes in each request pdu just for handling REPORT_ZONE operation, this way does waste memory since request pdu is allocated statically. Store the transient zone report data into one global xarray, and remove it after the report zone request is completed. This way is reasonable since report zone is run in slow code path. Fixes: 29802d7ca33b ("ublk: enable zoned storage support") Cc: Damien Le Moal Cc: Andreas Hindborg Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240812013624.587587-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/ublk_drv.c | 62 +++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bf7f68e90953..9cafbce1faf3 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -68,9 +68,6 @@ struct ublk_rq_data { struct llist_node node; struct kref ref; - __u64 sector; - __u32 operation; - __u32 nr_zones; }; struct ublk_uring_cmd_pdu { @@ -215,6 +212,33 @@ static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq) #ifdef CONFIG_BLK_DEV_ZONED +struct ublk_zoned_report_desc { + __u64 sector; + __u32 operation; + __u32 nr_zones; +}; + +static DEFINE_XARRAY(ublk_zoned_report_descs); + +static int ublk_zoned_insert_report_desc(const struct request *req, + struct ublk_zoned_report_desc *desc) +{ + return xa_insert(&ublk_zoned_report_descs, (unsigned long)req, + desc, GFP_KERNEL); +} + +static struct ublk_zoned_report_desc *ublk_zoned_erase_report_desc( + const struct request *req) +{ + return xa_erase(&ublk_zoned_report_descs, (unsigned long)req); +} + +static struct ublk_zoned_report_desc *ublk_zoned_get_report_desc( + const struct request *req) +{ + return xa_load(&ublk_zoned_report_descs, (unsigned long)req); +} + static int ublk_get_nr_zones(const struct ublk_device *ub) { const struct ublk_param_basic *p = &ub->params.basic; @@ -321,7 +345,7 @@ static int ublk_report_zones(struct gendisk *disk, sector_t sector, unsigned int zones_in_request = min_t(unsigned int, remaining_zones, max_zones_per_request); struct request *req; - struct ublk_rq_data *pdu; + struct ublk_zoned_report_desc desc; blk_status_t status; memset(buffer, 0, buffer_length); @@ -332,20 +356,23 @@ static int ublk_report_zones(struct gendisk *disk, sector_t sector, goto out; } - pdu = blk_mq_rq_to_pdu(req); - pdu->operation = UBLK_IO_OP_REPORT_ZONES; - pdu->sector = sector; - pdu->nr_zones = zones_in_request; + desc.operation = UBLK_IO_OP_REPORT_ZONES; + desc.sector = sector; + desc.nr_zones = zones_in_request; + ret = ublk_zoned_insert_report_desc(req, &desc); + if (ret) + goto free_req; ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length, GFP_KERNEL); - if (ret) { - blk_mq_free_request(req); - goto out; - } + if (ret) + goto erase_desc; status = blk_execute_rq(req, 0); ret = blk_status_to_errno(status); +erase_desc: + ublk_zoned_erase_report_desc(req); +free_req: blk_mq_free_request(req); if (ret) goto out; @@ -379,7 +406,7 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, { struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); struct ublk_io *io = &ubq->ios[req->tag]; - struct ublk_rq_data *pdu = blk_mq_rq_to_pdu(req); + struct ublk_zoned_report_desc *desc; u32 ublk_op; switch (req_op(req)) { @@ -402,12 +429,15 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, ublk_op = UBLK_IO_OP_ZONE_RESET_ALL; break; case REQ_OP_DRV_IN: - ublk_op = pdu->operation; + desc = ublk_zoned_get_report_desc(req); + if (!desc) + return BLK_STS_IOERR; + ublk_op = desc->operation; switch (ublk_op) { case UBLK_IO_OP_REPORT_ZONES: iod->op_flags = ublk_op | ublk_req_build_flags(req); - iod->nr_zones = pdu->nr_zones; - iod->start_sector = pdu->sector; + iod->nr_zones = desc->nr_zones; + iod->start_sector = desc->sector; return BLK_STS_OK; default: return BLK_STS_IOERR; From 6e73b946a379a1dfbb62626af93843bdfb53753d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 30 Aug 2024 11:41:45 +0800 Subject: [PATCH 077/534] nbd: fix race between timeout and normal completion [ Upstream commit c9ea57c91f03bcad415e1a20113bdb2077bcf990 ] If request timetout is handled by nbd_requeue_cmd(), normal completion has to be stopped for avoiding to complete this requeued request, other use-after-free can be triggered. Fix the race by clearing NBD_CMD_INFLIGHT in nbd_requeue_cmd(), meantime make sure that cmd->lock is grabbed for clearing the flag and the requeue. Cc: Josef Bacik Cc: Yu Kuai Fixes: 2895f1831e91 ("nbd: don't clear 'NBD_CMD_INFLIGHT' flag if request is not completed") Signed-off-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20240830034145.1827742-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1089dc646b80..96b349148e57 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -181,6 +181,17 @@ static void nbd_requeue_cmd(struct nbd_cmd *cmd) { struct request *req = blk_mq_rq_from_pdu(cmd); + lockdep_assert_held(&cmd->lock); + + /* + * Clear INFLIGHT flag so that this cmd won't be completed in + * normal completion path + * + * INFLIGHT flag will be set when the cmd is queued to nbd next + * time. + */ + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); + if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) blk_mq_requeue_request(req, true); } @@ -461,8 +472,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } - mutex_unlock(&cmd->lock); nbd_requeue_cmd(cmd); + mutex_unlock(&cmd->lock); nbd_config_put(nbd); return BLK_EH_DONE; } From 7faed2896d78e48ec96229e73b30b0af6c00a9aa Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 2 Sep 2024 21:03:26 +0800 Subject: [PATCH 078/534] block, bfq: fix possible UAF for bfqq->bic with merge chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 18ad4df091dd5d067d2faa8fce1180b79f7041a7 ] 1) initial state, three tasks: Process 1 Process 2 Process 3 (BIC1) (BIC2) (BIC3) | Λ | Λ | Λ | | | | | | V | V | V | bfqq1 bfqq2 bfqq3 process ref: 1 1 1 2) bfqq1 merged to bfqq2: Process 1 Process 2 Process 3 (BIC1) (BIC2) (BIC3) | | | Λ \--------------\| | | V V | bfqq1--------->bfqq2 bfqq3 process ref: 0 2 1 3) bfqq2 merged to bfqq3: Process 1 Process 2 Process 3 (BIC1) (BIC2) (BIC3) here -> Λ | | \--------------\ \-------------\| V V bfqq1--------->bfqq2---------->bfqq3 process ref: 0 1 3 In this case, IO from Process 1 will get bfqq2 from BIC1 first, and then get bfqq3 through merge chain, and finially handle IO by bfqq3. Howerver, current code will think bfqq2 is owned by BIC1, like initial state, and set bfqq2->bic to BIC1. bfq_insert_request -> by Process 1 bfqq = bfq_init_rq(rq) bfqq = bfq_get_bfqq_handle_split bfqq = bic_to_bfqq -> get bfqq2 from BIC1 bfqq->ref++ rq->elv.priv[0] = bic rq->elv.priv[1] = bfqq if (bfqq_process_refs(bfqq) == 1) bfqq->bic = bic -> record BIC1 to bfqq2 __bfq_insert_request new_bfqq = bfq_setup_cooperator -> get bfqq3 from bfqq2->new_bfqq bfqq_request_freed(bfqq) new_bfqq->ref++ rq->elv.priv[1] = new_bfqq -> handle IO by bfqq3 Fix the problem by checking bfqq is from merge chain fist. And this might fix a following problem reported by our syzkaller(unreproducible): ================================================================== BUG: KASAN: slab-use-after-free in bfq_do_early_stable_merge block/bfq-iosched.c:5692 [inline] BUG: KASAN: slab-use-after-free in bfq_do_or_sched_stable_merge block/bfq-iosched.c:5805 [inline] BUG: KASAN: slab-use-after-free in bfq_get_queue+0x25b0/0x2610 block/bfq-iosched.c:5889 Write of size 1 at addr ffff888123839eb8 by task kworker/0:1H/18595 CPU: 0 PID: 18595 Comm: kworker/0:1H Tainted: G L 6.6.0-07439-gba2303cacfda #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Workqueue: kblockd blk_mq_requeue_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x91/0xf0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0x10d/0x610 mm/kasan/report.c:475 kasan_report+0x8e/0xc0 mm/kasan/report.c:588 bfq_do_early_stable_merge block/bfq-iosched.c:5692 [inline] bfq_do_or_sched_stable_merge block/bfq-iosched.c:5805 [inline] bfq_get_queue+0x25b0/0x2610 block/bfq-iosched.c:5889 bfq_get_bfqq_handle_split+0x169/0x5d0 block/bfq-iosched.c:6757 bfq_init_rq block/bfq-iosched.c:6876 [inline] bfq_insert_request block/bfq-iosched.c:6254 [inline] bfq_insert_requests+0x1112/0x5cf0 block/bfq-iosched.c:6304 blk_mq_insert_request+0x290/0x8d0 block/blk-mq.c:2593 blk_mq_requeue_work+0x6bc/0xa70 block/blk-mq.c:1502 process_one_work kernel/workqueue.c:2627 [inline] process_scheduled_works+0x432/0x13f0 kernel/workqueue.c:2700 worker_thread+0x6f2/0x1160 kernel/workqueue.c:2781 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:305 Allocated by task 20776: kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x87/0x90 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3458 [inline] kmem_cache_alloc_node+0x1a4/0x6f0 mm/slub.c:3503 ioc_create_icq block/blk-ioc.c:370 [inline] ioc_find_get_icq+0x180/0xaa0 block/blk-ioc.c:436 bfq_prepare_request+0x39/0xf0 block/bfq-iosched.c:6812 blk_mq_rq_ctx_init.isra.7+0x6ac/0xa00 block/blk-mq.c:403 __blk_mq_alloc_requests+0xcc0/0x1070 block/blk-mq.c:517 blk_mq_get_new_requests block/blk-mq.c:2940 [inline] blk_mq_submit_bio+0x624/0x27c0 block/blk-mq.c:3042 __submit_bio+0x331/0x6f0 block/blk-core.c:624 __submit_bio_noacct_mq block/blk-core.c:703 [inline] submit_bio_noacct_nocheck+0x816/0xb40 block/blk-core.c:732 submit_bio_noacct+0x7a6/0x1b50 block/blk-core.c:826 xlog_write_iclog+0x7d5/0xa00 fs/xfs/xfs_log.c:1958 xlog_state_release_iclog+0x3b8/0x720 fs/xfs/xfs_log.c:619 xlog_cil_push_work+0x19c5/0x2270 fs/xfs/xfs_log_cil.c:1330 process_one_work kernel/workqueue.c:2627 [inline] process_scheduled_works+0x432/0x13f0 kernel/workqueue.c:2700 worker_thread+0x6f2/0x1160 kernel/workqueue.c:2781 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:305 Freed by task 946: kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x50 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] __kasan_slab_free+0x12c/0x1c0 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1815 [inline] slab_free_freelist_hook mm/slub.c:1841 [inline] slab_free mm/slub.c:3786 [inline] kmem_cache_free+0x118/0x6f0 mm/slub.c:3808 rcu_do_batch+0x35c/0xe30 kernel/rcu/tree.c:2189 rcu_core+0x819/0xd90 kernel/rcu/tree.c:2462 __do_softirq+0x1b0/0x7a2 kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xaf/0xc0 mm/kasan/generic.c:492 __call_rcu_common kernel/rcu/tree.c:2712 [inline] call_rcu+0xce/0x1020 kernel/rcu/tree.c:2826 ioc_destroy_icq+0x54c/0x830 block/blk-ioc.c:105 ioc_release_fn+0xf0/0x360 block/blk-ioc.c:124 process_one_work kernel/workqueue.c:2627 [inline] process_scheduled_works+0x432/0x13f0 kernel/workqueue.c:2700 worker_thread+0x6f2/0x1160 kernel/workqueue.c:2781 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:305 Second to last potentially related work creation: kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xaf/0xc0 mm/kasan/generic.c:492 __call_rcu_common kernel/rcu/tree.c:2712 [inline] call_rcu+0xce/0x1020 kernel/rcu/tree.c:2826 ioc_destroy_icq+0x54c/0x830 block/blk-ioc.c:105 ioc_release_fn+0xf0/0x360 block/blk-ioc.c:124 process_one_work kernel/workqueue.c:2627 [inline] process_scheduled_works+0x432/0x13f0 kernel/workqueue.c:2700 worker_thread+0x6f2/0x1160 kernel/workqueue.c:2781 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:305 The buggy address belongs to the object at ffff888123839d68 which belongs to the cache bfq_io_cq of size 1360 The buggy address is located 336 bytes inside of freed 1360-byte region [ffff888123839d68, ffff88812383a2b8) The buggy address belongs to the physical page: page:ffffea00048e0e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88812383f588 pfn:0x123838 head:ffffea00048e0e00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x17ffffc0000a40(workingset|slab|head|node=0|zone=2|lastcpupid=0x1fffff) page_type: 0xffffffff() raw: 0017ffffc0000a40 ffff88810588c200 ffffea00048ffa10 ffff888105889488 raw: ffff88812383f588 0000000000150006 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888123839d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888123839e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888123839e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888123839f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888123839f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 36eca8948323 ("block, bfq: add Early Queue Merge (EQM)") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240902130329.3787024-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3cce6de464a7..7d13420a16d6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6932,7 +6932,8 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) * addition, if the queue has also just been split, we have to * resume its state. */ - if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { + if (likely(bfqq != &bfqd->oom_bfqq) && !bfqq->new_bfqq && + bfqq_process_refs(bfqq) == 1) { bfqq->bic = bic; if (split) { /* From e50c9a352676943780c59c418d08aca7d6486e96 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 2 Sep 2024 21:03:27 +0800 Subject: [PATCH 079/534] block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0e456dba86c7f9a19792204a044835f1ca2c8dbb ] Consider the following merge chain: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \--------------\ \-------------\ \-------------\| V V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 IO from Process 1 will get bfqf2 from BIC1 first, then bfq_setup_cooperator() will found bfqq2 already merged to bfqq3 and then handle this IO from bfqq3. However, the merge chain can be much deeper and bfqq3 can be merged to other bfqq as well. Fix this problem by iterating to the last bfqq in bfq_setup_cooperator(). Fixes: 36eca8948323 ("block, bfq: add Early Queue Merge (EQM)") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240902130329.3787024-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7d13420a16d6..bdb7ceb68089 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2911,8 +2911,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx]; /* if a merge has already been setup, then proceed with that first */ - if (bfqq->new_bfqq) - return bfqq->new_bfqq; + new_bfqq = bfqq->new_bfqq; + if (new_bfqq) { + while (new_bfqq->new_bfqq) + new_bfqq = new_bfqq->new_bfqq; + return new_bfqq; + } /* * Check delayed stable merge for rotational or non-queueing From 19f3bec2ac4be329b9bd12b18a989b867618d2d8 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 2 Sep 2024 21:03:28 +0800 Subject: [PATCH 080/534] block, bfq: don't break merge chain in bfq_split_bfqq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 42c306ed723321af4003b2a41bb73728cab54f85 ] Consider the following scenario: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \-------------\ \-------------\ \--------------\| V V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 1 2 4 If Process 1 issue a new IO and bfqq2 is found, and then bfq_init_rq() decide to spilt bfqq2 by bfq_split_bfqq(). Howerver, procress reference of bfqq2 is 1 and bfq_split_bfqq() just clear the coop flag, which will break the merge chain. Expected result: caller will allocate a new bfqq for BIC1 Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) | | | \-------------\ \--------------\| V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 0 1 3 Since the condition is only used for the last bfqq4 when the previous bfqq2 and bfqq3 are already splited. Fix the problem by checking if bfqq is the last one in the merge chain as well. Fixes: 36eca8948323 ("block, bfq: add Early Queue Merge (EQM)") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240902130329.3787024-4-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index bdb7ceb68089..2c09c298d894 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6725,7 +6725,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); - if (bfqq_process_refs(bfqq) == 1) { + if (bfqq_process_refs(bfqq) == 1 && !bfqq->new_bfqq) { bfqq->pid = current->pid; bfq_clear_bfqq_coop(bfqq); bfq_clear_bfqq_split_coop(bfqq); From 81b048b9484bf8b3c0ad6e901a6b79fb941173b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Jul 2024 15:35:29 +0100 Subject: [PATCH 081/534] cachefiles: Fix non-taking of sb_writers around set/removexattr [ Upstream commit 80887f31672970abae3aaa9cf62ac72a124e7c89 ] Unlike other vfs_xxxx() calls, vfs_setxattr() and vfs_removexattr() don't take the sb_writers lock, so the caller should do it for them. Fix cachefiles to do this. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Signed-off-by: David Howells cc: Christian Brauner cc: Gao Xiang cc: netfs@lists.linux.dev cc: linux-erofs@lists.ozlabs.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-3-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/cachefiles/xattr.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 4dd8a993c60a..7c6f260a3be5 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -64,9 +64,15 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object) memcpy(buf->data, fscache_get_aux(object->cookie), len); ret = cachefiles_inject_write_error(); - if (ret == 0) - ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, - buf, sizeof(struct cachefiles_xattr) + len, 0); + if (ret == 0) { + ret = mnt_want_write_file(file); + if (ret == 0) { + ret = vfs_setxattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache, buf, + sizeof(struct cachefiles_xattr) + len, 0); + mnt_drop_write_file(file); + } + } if (ret < 0) { trace_cachefiles_vfs_error(object, file_inode(file), ret, cachefiles_trace_setxattr_error); @@ -151,8 +157,14 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, int ret; ret = cachefiles_inject_remove_error(); - if (ret == 0) - ret = vfs_removexattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache); + if (ret == 0) { + ret = mnt_want_write(cache->mnt); + if (ret == 0) { + ret = vfs_removexattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache); + mnt_drop_write(cache->mnt); + } + } if (ret < 0) { trace_cachefiles_vfs_error(object, d_inode(dentry), ret, cachefiles_trace_remxattr_error); @@ -208,9 +220,15 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) memcpy(buf->data, p, volume->vcookie->coherency_len); ret = cachefiles_inject_write_error(); - if (ret == 0) - ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, - buf, len, 0); + if (ret == 0) { + ret = mnt_want_write(volume->cache->mnt); + if (ret == 0) { + ret = vfs_setxattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache, + buf, len, 0); + mnt_drop_write(volume->cache->mnt); + } + } if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, cachefiles_trace_setxattr_error); From 0c9b52bfee0ebf0bfbeb666c9c60e6e2d34aa1e6 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 9 Sep 2024 11:19:11 +0800 Subject: [PATCH 082/534] erofs: fix incorrect symlink detection in fast symlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9ed50b8231e37b1ae863f5dec8153b98d9f389b4 ] Fast symlink can be used if the on-disk symlink data is stored in the same block as the on-disk inode, so we don’t need to trigger another I/O for symlink data. However, currently fs correction could be reported _incorrectly_ if inode xattrs are too large. In fact, these should be valid images although they cannot be handled as fast symlinks. Many thanks to Colin for reporting this! Reported-by: Colin Walters Reported-by: https://honggfuzz.dev/ Link: https://lore.kernel.org/r/bb2dd430-7de0-47da-ae5b-82ab2dd4d945@app.fastmail.com Fixes: 431339ba9042 ("staging: erofs: add inode operations") [ Note that it's a runtime misbehavior instead of a security issue. ] Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240909031911.1174718-1-hsiangkao@linux.alibaba.com Signed-off-by: Sasha Levin --- fs/erofs/inode.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index edc8ec7581b8..9e40bee3682f 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -205,12 +205,14 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, unsigned int m_pofs) { struct erofs_inode *vi = EROFS_I(inode); - unsigned int bsz = i_blocksize(inode); + loff_t off; char *lnk; - /* if it cannot be handled with fast symlink scheme */ - if (vi->datalayout != EROFS_INODE_FLAT_INLINE || - inode->i_size >= bsz || inode->i_size < 0) { + m_pofs += vi->xattr_isize; + /* check if it cannot be handled with fast symlink scheme */ + if (vi->datalayout != EROFS_INODE_FLAT_INLINE || inode->i_size < 0 || + check_add_overflow(m_pofs, inode->i_size, &off) || + off > i_blocksize(inode)) { inode->i_op = &erofs_symlink_iops; return 0; } @@ -219,16 +221,6 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, if (!lnk) return -ENOMEM; - m_pofs += vi->xattr_isize; - /* inline symlink data shouldn't cross block boundary */ - if (m_pofs + inode->i_size > bsz) { - kfree(lnk); - erofs_err(inode->i_sb, - "inline data cross block boundary @ nid %llu", - vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } memcpy(lnk, kaddr + m_pofs, inode->i_size); lnk[inode->i_size] = '\0'; From 0780451f03bf518bc032a7c584de8f92e2d39d7f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 9 Sep 2024 21:41:48 +0800 Subject: [PATCH 083/534] block, bfq: fix uaf for accessing waker_bfqq after splitting [ Upstream commit 1ba0403ac6447f2d63914fb760c44a3b19c44eaf ] After commit 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()"), if the current procress is the last holder of bfqq, the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq may in the merge chain of bfqq, hence just recored waker_bfqq is still not safe. Fix the problem by adding a helper bfq_waker_bfqq() to check if bfqq->waker_bfqq is in the merge chain, and current procress is the only holder. Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 2c09c298d894..055ba1f98009 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6823,6 +6823,31 @@ static void bfq_prepare_request(struct request *rq) rq->elv.priv[0] = rq->elv.priv[1] = NULL; } +static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq) +{ + struct bfq_queue *new_bfqq = bfqq->new_bfqq; + struct bfq_queue *waker_bfqq = bfqq->waker_bfqq; + + if (!waker_bfqq) + return NULL; + + while (new_bfqq) { + if (new_bfqq == waker_bfqq) { + /* + * If waker_bfqq is in the merge chain, and current + * is the only procress. + */ + if (bfqq_process_refs(waker_bfqq) == 1) + return NULL; + break; + } + + new_bfqq = new_bfqq->new_bfqq; + } + + return waker_bfqq; +} + /* * If needed, init rq, allocate bfq data structures associated with * rq, and increment reference counters in the destination bfq_queue @@ -6884,7 +6909,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) /* If the queue was seeky for too long, break it apart. */ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) && !bic->bfqq_data[a_idx].stably_merged) { - struct bfq_queue *old_bfqq = bfqq; + struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq); /* Update bic before losing reference to bfqq */ if (bfq_bfqq_in_large_burst(bfqq)) @@ -6904,7 +6929,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq_already_existing = true; if (!bfqq_already_existing) { - bfqq->waker_bfqq = old_bfqq->waker_bfqq; + bfqq->waker_bfqq = waker_bfqq; bfqq->tentative_waker_bfqq = NULL; /* @@ -6914,7 +6939,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) * woken_list of the waker. See * bfq_check_waker for details. */ - if (bfqq->waker_bfqq) + if (waker_bfqq) hlist_add_head(&bfqq->woken_list_node, &bfqq->waker_bfqq->woken_list); } From c3eba0a4e940fb4a2142c06f45a161b4a39aee48 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 9 Sep 2024 21:41:49 +0800 Subject: [PATCH 084/534] block, bfq: fix procress reference leakage for bfqq in merge chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 73aeab373557fa6ee4ae0b742c6211ccd9859280 ] Original state: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \--------------\ \-------------\ \-------------\| V V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 1 2 4 After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()"), if P1 issues a new IO: Without the patch: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \------------------------------\ \-------------\| V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 0 2 4 bfqq3 will be used to handle IO from P1, this is not expected, IO should be redirected to bfqq4; With the patch: ------------------------------------------- | | Process 1 Process 2 Process 3 | Process 4 (BIC1) (BIC2) (BIC3) | (BIC4) | | | | \-------------\ \-------------\| V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 0 2 4 IO is redirected to bfqq4, however, procress reference of bfqq3 is still 2, while there is only P2 using it. Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify code. Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 055ba1f98009..7e0dcded5713 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void -bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_queue *bfqq) { + struct bfq_queue *new_bfqq = bfqq->new_bfqq; + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", (unsigned long)new_bfqq->pid); /* Save weight raising and idle window of the merged queues */ @@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, bfq_reassign_last_bfqq(bfqq, new_bfqq); bfq_release_process_ref(bfqd, bfqq); + + return new_bfqq; } static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, @@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * fulfilled, i.e., bic can be redirected to new_bfqq * and bfqq can be put. */ - bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, - new_bfqq); - /* - * If we get here, bio will be queued into new_queue, - * so use new_bfqq to decide whether bio and rq can be - * merged. - */ - bfqq = new_bfqq; + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq); /* * Change also bqfd->bio_bfqq, as @@ -5703,9 +5701,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq, * state before killing it. */ bfqq->bic = bic; - bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); - - return new_bfqq; + return bfq_merge_bfqqs(bfqd, bic, bfqq); } /* @@ -6160,6 +6156,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bool waiting, idle_timer_disabled = false; if (new_bfqq) { + struct bfq_queue *old_bfqq = bfqq; /* * Release the request's reference to the old bfqq * and make sure one is taken to the shared queue. @@ -6176,18 +6173,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) * new_bfqq. */ if (bic_to_bfqq(RQ_BIC(rq), true, - bfq_actuator_index(bfqd, rq->bio)) == bfqq) - bfq_merge_bfqqs(bfqd, RQ_BIC(rq), - bfqq, new_bfqq); + bfq_actuator_index(bfqd, rq->bio)) == bfqq) { + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq); + } - bfq_clear_bfqq_just_created(bfqq); + bfq_clear_bfqq_just_created(old_bfqq); /* * rq is about to be enqueued into new_bfqq, * release rq reference on bfqq */ - bfq_put_queue(bfqq); + bfq_put_queue(old_bfqq); rq->elv.priv[1] = new_bfqq; - bfqq = new_bfqq; } bfq_update_io_thinktime(bfqd, bfqq); From 7b3a35584db4c9690a8852356a69bea4a37e7a08 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Tue, 10 Sep 2024 19:11:56 +0200 Subject: [PATCH 085/534] io_uring/io-wq: do not allow pinning outside of cpuset [ Upstream commit 0997aa5497c714edbb349ca366d28bd550ba3408 ] The io worker threads are userland threads that just never exit to the userland. By that, they are also assigned to a cgroup (the group of the creating task). When changing the affinity of the io_wq thread via syscall, we must only allow cpumasks within the limits defined by the cpuset controller of the cgroup (if enabled). Fixes: da64d6db3bd3 ("io_uring: One wqe per wq") Signed-off-by: Felix Moessbauer Link: https://lore.kernel.org/r/20240910171157.166423-2-felix.moessbauer@siemens.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io-wq.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 98c9cfb98306..2df8d9873849 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1324,17 +1325,29 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask) { + cpumask_var_t allowed_mask; + int ret = 0; + if (!tctx || !tctx->io_wq) return -EINVAL; + if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL)) + return -ENOMEM; + rcu_read_lock(); - if (mask) - cpumask_copy(tctx->io_wq->cpu_mask, mask); - else - cpumask_copy(tctx->io_wq->cpu_mask, cpu_possible_mask); + cpuset_cpus_allowed(tctx->io_wq->task, allowed_mask); + if (mask) { + if (cpumask_subset(mask, allowed_mask)) + cpumask_copy(tctx->io_wq->cpu_mask, mask); + else + ret = -EINVAL; + } else { + cpumask_copy(tctx->io_wq->cpu_mask, allowed_mask); + } rcu_read_unlock(); - return 0; + free_cpumask_var(allowed_mask); + return ret; } /* From 5740c0fa9367d351972b334919ef10eae8385d3f Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Tue, 10 Sep 2024 19:11:57 +0200 Subject: [PATCH 086/534] io_uring/io-wq: inherit cpuset of cgroup in io worker [ Upstream commit 84eacf177faa605853c58e5b1c0d9544b88c16fd ] The io worker threads are userland threads that just never exit to the userland. By that, they are also assigned to a cgroup (the group of the creating task). When creating a new io worker, this worker should inherit the cpuset of the cgroup. Fixes: da64d6db3bd3 ("io_uring: One wqe per wq") Signed-off-by: Felix Moessbauer Link: https://lore.kernel.org/r/20240910171157.166423-3-felix.moessbauer@siemens.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io-wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 2df8d9873849..a1e31723c9ed 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1170,7 +1170,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) if (!alloc_cpumask_var(&wq->cpu_mask, GFP_KERNEL)) goto err; - cpumask_copy(wq->cpu_mask, cpu_possible_mask); + cpuset_cpus_allowed(data->task, wq->cpu_mask); wq->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; wq->acct[IO_WQ_ACCT_UNBOUND].max_workers = task_rlimit(current, RLIMIT_NPROC); From 0d7ddfc892844d1c6cf7ce72022fa4fcbc489196 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Fri, 12 Jan 2024 00:15:18 +0100 Subject: [PATCH 087/534] block: print symbolic error name instead of error code [ Upstream commit 25c1772a0493463408489b1fae65cf77fe46cac1 ] Utilize the %pe print specifier to get the symbolic error name as a string (i.e "-ENOMEM") in the log message instead of the error code to increase its readablility. This change was suggested in https://lore.kernel.org/all/92972476-0b1f-4d0a-9951-af3fc8bc6e65@suswa.mountain/ Signed-off-by: Christian Heusel Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240111231521.1596838-1-christian@heusel.eu Signed-off-by: Jens Axboe Stable-dep-of: 26e197b7f924 ("block: fix potential invalid pointer dereference in blk_add_partition") Signed-off-by: Sasha Levin --- block/partitions/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 962e4b57d64a..6b2ef9977617 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -575,8 +575,8 @@ static bool blk_add_partition(struct gendisk *disk, part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) { - printk(KERN_ERR " %s: p%d could not be added: %ld\n", - disk->disk_name, p, -PTR_ERR(part)); + printk(KERN_ERR " %s: p%d could not be added: %pe\n", + disk->disk_name, p, part); return true; } From 80f5bfbb80ea1615290dbc24f49d3d8c86db58fe Mon Sep 17 00:00:00 2001 From: Riyan Dhiman Date: Wed, 11 Sep 2024 18:59:54 +0530 Subject: [PATCH 088/534] block: fix potential invalid pointer dereference in blk_add_partition [ Upstream commit 26e197b7f9240a4ac301dd0ad520c0c697c2ea7d ] The blk_add_partition() function initially used a single if-condition (IS_ERR(part)) to check for errors when adding a partition. This was modified to handle the specific case of -ENXIO separately, allowing the function to proceed without logging the error in this case. However, this change unintentionally left a path where md_autodetect_dev() could be called without confirming that part is a valid pointer. This commit separates the error handling logic by splitting the initial if-condition, improving code readability and handling specific error scenarios explicitly. The function now distinguishes the general error case from -ENXIO without altering the existing behavior of md_autodetect_dev() calls. Fixes: b72053072c0b (block: allow partitions on host aware zone devices) Signed-off-by: Riyan Dhiman Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240911132954.5874-1-riyandhiman14@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/partitions/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 6b2ef9977617..fc0ab5d8ab70 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -574,9 +574,11 @@ static bool blk_add_partition(struct gendisk *disk, part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); - if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) { - printk(KERN_ERR " %s: p%d could not be added: %pe\n", - disk->disk_name, p, part); + if (IS_ERR(part)) { + if (PTR_ERR(part) != -ENXIO) { + printk(KERN_ERR " %s: p%d could not be added: %pe\n", + disk->disk_name, p, part); + } return true; } From 1b08f7b5f56d1ac8150bc084e9999b3abf5431fa Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 24 Jul 2024 16:40:47 +0800 Subject: [PATCH 089/534] spi: ppc4xx: handle irq_of_parse_and_map() errors [ Upstream commit 0f245463b01ea254ae90e1d0389e90b0e7d8dc75 ] Zero and negative number is not a valid IRQ for in-kernel code and the irq_of_parse_and_map() function returns zero on error. So this check for valid IRQs should only accept values > 0. Fixes: 44dab88e7cc9 ("spi: add spi_ppc4xx driver") Signed-off-by: Ma Ke Link: https://patch.msgid.link/20240724084047.1506084-1-make24@iscas.ac.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-ppc4xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index e982d3189fdc..0d698580194e 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -411,6 +411,9 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) /* Request IRQ */ hw->irqnum = irq_of_parse_and_map(np, 0); + if (hw->irqnum <= 0) + goto free_host; + ret = request_irq(hw->irqnum, spi_ppc4xx_int, 0, "spi_ppc4xx_of", (void *)hw); if (ret) { From 8d81cd1a048ae4c93d81e74a45359c4052032df0 Mon Sep 17 00:00:00 2001 From: David Virag Date: Sat, 13 Jul 2024 19:58:32 +0200 Subject: [PATCH 090/534] arm64: dts: exynos: exynos7885-jackpotlte: Correct RAM amount to 4GB [ Upstream commit d281814b8f7a710a75258da883fb0dfe1329c031 ] All known jackpotlte variants have 4GB of RAM, let's use it all. RAM was set to 3GB from a mistake in the vendor provided DTS file. Fixes: 06874015327b ("arm64: dts: exynos: Add initial device tree support for Exynos7885 SoC") Signed-off-by: David Virag Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20240713180607.147942-3-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/exynos/exynos7885-jackpotlte.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/exynos/exynos7885-jackpotlte.dts b/arch/arm64/boot/dts/exynos/exynos7885-jackpotlte.dts index 47a389d9ff7d..9d74fa6bfed9 100644 --- a/arch/arm64/boot/dts/exynos/exynos7885-jackpotlte.dts +++ b/arch/arm64/boot/dts/exynos/exynos7885-jackpotlte.dts @@ -32,7 +32,7 @@ device_type = "memory"; reg = <0x0 0x80000000 0x3da00000>, <0x0 0xc0000000 0x40000000>, - <0x8 0x80000000 0x40000000>; + <0x8 0x80000000 0x80000000>; }; gpio-keys { From bd7fa63736c75150d3d94e71d6968695e1fd251a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 25 Jul 2024 09:22:43 +0200 Subject: [PATCH 091/534] arm64: dts: mediatek: mt8186: Fix supported-hw mask for GPU OPPs [ Upstream commit 2317d018b835842df0501d8f9e9efa843068a101 ] The speedbin eFuse reads a value 'x' from 0 to 7 and, in order to make that compatible with opp-supported-hw, it gets post processed as BIT(x). Change all of the 0x30 supported-hw to 0x20 to avoid getting duplicate OPPs for speedbin 4, and also change all of the 0x8 to 0xcf because speedbins different from 4 and 5 do support 900MHz, 950MHz, 1000MHz with the higher voltage of 850mV, 900mV, 950mV respectively. Fixes: f38ea593ad0d ("arm64: dts: mediatek: mt8186: Wire up GPU voltage/frequency scaling") Link: https://lore.kernel.org/r/20240725072243.173104-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index 84ec6c1aa12b..2c184f9e0fc3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -731,7 +731,7 @@ opp-900000000-3 { opp-hz = /bits/ 64 <900000000>; opp-microvolt = <850000>; - opp-supported-hw = <0x8>; + opp-supported-hw = <0xcf>; }; opp-900000000-4 { @@ -743,13 +743,13 @@ opp-900000000-5 { opp-hz = /bits/ 64 <900000000>; opp-microvolt = <825000>; - opp-supported-hw = <0x30>; + opp-supported-hw = <0x20>; }; opp-950000000-3 { opp-hz = /bits/ 64 <950000000>; opp-microvolt = <900000>; - opp-supported-hw = <0x8>; + opp-supported-hw = <0xcf>; }; opp-950000000-4 { @@ -761,13 +761,13 @@ opp-950000000-5 { opp-hz = /bits/ 64 <950000000>; opp-microvolt = <850000>; - opp-supported-hw = <0x30>; + opp-supported-hw = <0x20>; }; opp-1000000000-3 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <950000>; - opp-supported-hw = <0x8>; + opp-supported-hw = <0xcf>; }; opp-1000000000-4 { @@ -779,7 +779,7 @@ opp-1000000000-5 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <875000>; - opp-supported-hw = <0x30>; + opp-supported-hw = <0x20>; }; }; From 6699567b0bbb378600a4dc0a1f929439a4e84a2c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 12 Aug 2024 18:33:32 +0100 Subject: [PATCH 092/534] firmware: arm_scmi: Fix double free in OPTEE transport [ Upstream commit e98dba934b2fc587eafb83f47ad64d9053b18ae0 ] Channels can be shared between protocols, avoid freeing the same channel descriptors twice when unloading the stack. Fixes: 5f90f189a052 ("firmware: arm_scmi: Add optee transport") Signed-off-by: Cristian Marussi Tested-by: Peng Fan #i.MX95 19x19 EVK Reviewed-by: Peng Fan Tested-by: Florian Fainelli Message-Id: <20240812173340.3912830-2-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/optee.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index e123de6e8c67..aa02392265d3 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -467,6 +467,13 @@ static int scmi_optee_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_optee_channel *channel = cinfo->transport_info; + /* + * Different protocols might share the same chan info, so a previous + * call might have already freed the structure. + */ + if (!channel) + return 0; + mutex_lock(&scmi_optee_private->mu); list_del(&channel->link); mutex_unlock(&scmi_optee_private->mu); From 3bf127bc26959385e03ca788bbea4cba15d0bfed Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 14 Aug 2024 17:45:12 +0300 Subject: [PATCH 093/534] spi: ppc4xx: Avoid returning 0 when failed to parse and map IRQ [ Upstream commit 7781f1d120fec8624fc654eda900fc8748262082 ] 0 is incorrect error code when failed to parse and map IRQ. Replace OF specific old API for IRQ retrieval with a generic one to fix this issue. Fixes: 0f245463b01e ("spi: ppc4xx: handle irq_of_parse_and_map() errors") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240814144525.2648450-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-ppc4xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 0d698580194e..1381563941fe 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -410,9 +409,10 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) } /* Request IRQ */ - hw->irqnum = irq_of_parse_and_map(np, 0); - if (hw->irqnum <= 0) + ret = platform_get_irq(op, 0); + if (ret < 0) goto free_host; + hw->irqnum = ret; ret = request_irq(hw->irqnum, spi_ppc4xx_int, 0, "spi_ppc4xx_of", (void *)hw); From 1ccd886abf45aeb6bca5c45a0ed19cd31dceb070 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 22 Aug 2024 15:20:45 +0800 Subject: [PATCH 094/534] regulator: Return actual error in of_regulator_bulk_get_all() [ Upstream commit 395a41a1d3e377462f3eea8a205ee72be8885ff6 ] If regulator_get() in of_regulator_bulk_get_all() returns an error, that error gets overridden and -EINVAL is always passed out. This masks probe deferral requests and likely won't work properly in all cases. Fix this by letting of_regulator_bulk_get_all() return the original error code. Fixes: 27b9ecc7a9ba ("regulator: Add of_regulator_bulk_get_all") Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240822072047.3097740-3-wenst@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/of_regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 1b65e5e4e40f..59e71fd0db43 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -768,7 +768,7 @@ restart: name[i] = '\0'; tmp = regulator_get(dev, name); if (IS_ERR(tmp)) { - ret = -EINVAL; + ret = PTR_ERR(tmp); goto error; } (*consumers)[n].consumer = tmp; From 7d0be3622399cd4af2b0a1ecfef2d7d94e57e8ac Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 30 Jul 2024 13:24:34 +0100 Subject: [PATCH 095/534] arm64: dts: renesas: r9a07g043u: Correct GICD and GICR sizes [ Upstream commit ab39547f739236e7f16b8b0a51fdca95cc9cadd3 ] The RZ/G2UL SoC is equipped with the GIC-600. The GICD is 64KiB + 64KiB for the MBI alias (in total 128KiB), and the GICR is 128KiB per CPU. Despite the RZ/G2UL SoC being single-core, it has two instances of GICR. Fixes: cf40c9689e510 ("arm64: dts: renesas: Add initial DTSI for RZ/G2UL SoC") Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/20240730122436.350013-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index 4b72de43b71c..71d51febabc1 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -145,8 +145,8 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; + reg = <0x0 0x11900000 0 0x20000>, + <0x0 0x11940000 0 0x40000>; interrupts = ; }; }; From c2bae2675ca65fdf707a669524ce4edee14538fc Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 30 Jul 2024 13:24:35 +0100 Subject: [PATCH 096/534] arm64: dts: renesas: r9a07g054: Correct GICD and GICR sizes [ Upstream commit 45afa9eacb59b258d2e53c7f63430ea1e8344803 ] The RZ/V2L SoC is equipped with the GIC-600. The GICD is 64KiB + 64KiB for the MBI alias (in total 128KiB), and the GICR is 128KiB per CPU. Fixes: 7c2b8198f4f32 ("arm64: dts: renesas: Add initial DTSI for RZ/V2L SoC") Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/20240730122436.350013-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g054.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi index 3f01b096cfb7..d61f7894e55c 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi @@ -1004,8 +1004,8 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; + reg = <0x0 0x11900000 0 0x20000>, + <0x0 0x11940000 0 0x40000>; interrupts = ; }; From 514265b1f154ab706b02be0813ef97d3a33ac50f Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 30 Jul 2024 13:24:36 +0100 Subject: [PATCH 097/534] arm64: dts: renesas: r9a07g044: Correct GICD and GICR sizes [ Upstream commit 833948fb2b63155847ab691a54800f801555429b ] The RZ/G2L(C) SoC is equipped with the GIC-600. The GICD is 64KiB + 64KiB for the MBI alias (in total 128KiB), and the GICR is 128KiB per CPU. Fixes: 68a45525297b2 ("arm64: dts: renesas: Add initial DTSI for RZ/G2{L,LC} SoC's") Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/20240730122436.350013-5-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index a877738c3048..edc942c84639 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -997,8 +997,8 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; + reg = <0x0 0x11900000 0 0x20000>, + <0x0 0x11940000 0 0x40000>; interrupts = ; }; From c742692fad4afaa77b44d925c125487e0e907dec Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Wed, 21 Aug 2024 07:51:36 +0200 Subject: [PATCH 098/534] ARM: dts: microchip: sam9x60: Fix rtc/rtt clocks [ Upstream commit d355c895fa4ddd8bec15569eee540baeed7df8c5 ] The RTC and RTT peripherals use the timing domain slow clock (TD_SLCK), sourced from the 32.768 kHz crystal oscillator or slow rc oscillator. The previously used Monitoring domain slow clock (MD_SLCK) is sourced from an internal RC oscillator which is most probably not precise enough for real time clock purposes. Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board") Fixes: 5f6b33f46346 ("ARM: dts: sam9x60: add rtt") Signed-off-by: Alexander Dahl Link: https://lore.kernel.org/r/20240821055136.6858-1-ada@thorsis.com [claudiu.beznea: removed () around the last commit description paragraph, removed " in front of "timing domain slow clock", described that TD_SLCK can also be sourced from slow rc oscillator] Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/microchip/sam9x60.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/microchip/sam9x60.dtsi b/arch/arm/boot/dts/microchip/sam9x60.dtsi index 73d570a17269..1705c96f4221 100644 --- a/arch/arm/boot/dts/microchip/sam9x60.dtsi +++ b/arch/arm/boot/dts/microchip/sam9x60.dtsi @@ -1312,7 +1312,7 @@ compatible = "microchip,sam9x60-rtt", "atmel,at91sam9260-rtt"; reg = <0xfffffe20 0x20>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; - clocks = <&clk32k 0>; + clocks = <&clk32k 1>; }; pit: timer@fffffe40 { @@ -1338,7 +1338,7 @@ compatible = "microchip,sam9x60-rtc", "atmel,at91sam9x5-rtc"; reg = <0xfffffea8 0x100>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; - clocks = <&clk32k 0>; + clocks = <&clk32k 1>; }; watchdog: watchdog@ffffff80 { From 6d91b3f570ab0c23d0c8bc37290a330ffc4c0236 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 27 Aug 2024 21:18:16 +0000 Subject: [PATCH 099/534] arm64: dts: rockchip: Correct vendor prefix for Hardkernel ODROID-M1 [ Upstream commit 735065e774dcfc62e38df01a535862138b6c92ed ] The vendor prefix for Hardkernel ODROID-M1 is incorrectly listed as rockchip. Use the proper hardkernel vendor prefix for this board, while at it also drop the redundant soc prefix. Fixes: fd3583267703 ("arm64: dts: rockchip: Add Hardkernel ODROID-M1 board") Reviewed-by: Aurelien Jarno Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20240827211825.1419820-3-jonas@kwiboo.se Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts index a337f547caf5..6a02db4f073f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts @@ -13,7 +13,7 @@ / { model = "Hardkernel ODROID-M1"; - compatible = "rockchip,rk3568-odroid-m1", "rockchip,rk3568"; + compatible = "hardkernel,odroid-m1", "rockchip,rk3568"; aliases { ethernet0 = &gmac0; From ff8444011fe5790bae9309f278d660b4c3ddc029 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Thu, 1 Aug 2024 13:12:31 -0500 Subject: [PATCH 100/534] arm64: dts: ti: k3-j721e-sk: Fix reversed C6x carveout locations [ Upstream commit 9f3814a7c06b7c7296cf8c1622078ad71820454b ] The DMA carveout for the C6x core 0 is at 0xa6000000 and core 1 is at 0xa7000000. These are reversed in DT. While both C6x can access either region, so this is not normally a problem, but if we start restricting the memory each core can access (such as with firewalls) the cores accessing the regions for the wrong core will not work. Fix this here. Fixes: f46d16cf5b43 ("arm64: dts: ti: k3-j721e-sk: Add DDR carveout memory nodes") Signed-off-by: Andrew Davis Link: https://lore.kernel.org/r/20240801181232.55027-1-afd@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/ti/k3-j721e-sk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts index 42fe8eee9ec8..ccacb65683b5 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts @@ -119,7 +119,7 @@ no-map; }; - c66_1_dma_memory_region: c66-dma-memory@a6000000 { + c66_0_dma_memory_region: c66-dma-memory@a6000000 { compatible = "shared-dma-pool"; reg = <0x00 0xa6000000 0x00 0x100000>; no-map; @@ -131,7 +131,7 @@ no-map; }; - c66_0_dma_memory_region: c66-dma-memory@a7000000 { + c66_1_dma_memory_region: c66-dma-memory@a7000000 { compatible = "shared-dma-pool"; reg = <0x00 0xa7000000 0x00 0x100000>; no-map; From 7c84cb5a399098b9567dfbc2ff04b323f8dbbff4 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Thu, 1 Aug 2024 13:12:32 -0500 Subject: [PATCH 101/534] arm64: dts: ti: k3-j721e-beagleboneai64: Fix reversed C6x carveout locations [ Upstream commit 1a314099b7559690fe23cdf3300dfff6e830ecb1 ] The DMA carveout for the C6x core 0 is at 0xa6000000 and core 1 is at 0xa7000000. These are reversed in DT. While both C6x can access either region, so this is not normally a problem, but if we start restricting the memory each core can access (such as with firewalls) the cores accessing the regions for the wrong core will not work. Fix this here. Fixes: fae14a1cb8dd ("arm64: dts: ti: Add k3-j721e-beagleboneai64") Signed-off-by: Andrew Davis Link: https://lore.kernel.org/r/20240801181232.55027-2-afd@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts b/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts index 2f954729f353..7897323376a5 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-beagleboneai64.dts @@ -123,7 +123,7 @@ no-map; }; - c66_1_dma_memory_region: c66-dma-memory@a6000000 { + c66_0_dma_memory_region: c66-dma-memory@a6000000 { compatible = "shared-dma-pool"; reg = <0x00 0xa6000000 0x00 0x100000>; no-map; @@ -135,7 +135,7 @@ no-map; }; - c66_0_dma_memory_region: c66-dma-memory@a7000000 { + c66_1_dma_memory_region: c66-dma-memory@a7000000 { compatible = "shared-dma-pool"; reg = <0x00 0xa7000000 0x00 0x100000>; no-map; From e91e803da1e58f046b574d89865038f90332c1d6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 26 Aug 2024 20:49:02 +0800 Subject: [PATCH 102/534] spi: bcmbca-hsspi: Fix missing pm_runtime_disable() [ Upstream commit 4439a2e92cb89028b22c5d7ba1b99e75d7d889f6 ] The pm_runtime_disable() is missing in remove function, use devm_pm_runtime_enable() to fix it. So the pm_runtime_disable() in the probe error path can also be removed. Fixes: a38a2233f23b ("spi: bcmbca-hsspi: Add driver for newer HSSPI controller") Signed-off-by: Jinjie Ruan Reviewed-by: William Zhang Link: https://patch.msgid.link/20240826124903.3429235-2-ruanjinjie@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcmbca-hsspi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-bcmbca-hsspi.c b/drivers/spi/spi-bcmbca-hsspi.c index 9f64afd8164e..4965bc86d7f5 100644 --- a/drivers/spi/spi-bcmbca-hsspi.c +++ b/drivers/spi/spi-bcmbca-hsspi.c @@ -546,12 +546,14 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev) goto out_put_host; } - pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + goto out_put_host; ret = sysfs_create_group(&pdev->dev.kobj, &bcmbca_hsspi_group); if (ret) { dev_err(&pdev->dev, "couldn't register sysfs group\n"); - goto out_pm_disable; + goto out_put_host; } /* register and we are done */ @@ -565,8 +567,6 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev) out_sysgroup_disable: sysfs_remove_group(&pdev->dev.kobj, &bcmbca_hsspi_group); -out_pm_disable: - pm_runtime_disable(&pdev->dev); out_put_host: spi_controller_put(host); out_disable_pll_clk: From 58bd96e5ec45def25456064dd482b7e0d15f7988 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 26 Aug 2024 19:53:20 +0300 Subject: [PATCH 103/534] ARM: dts: microchip: sama7g5: Fix RTT clock [ Upstream commit 867bf1923200e6ad82bad0289f43bf20b4ac7ff9 ] According to datasheet, Chapter 34. Clock Generator, section 34.2, Embedded characteristics, source clock for RTT is the TD_SLCK, registered with ID 1 by the slow clock controller driver. Fix RTT clock. Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek") Link: https://lore.kernel.org/r/20240826165320.3068359-1-claudiu.beznea@tuxon.dev Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/microchip/sama7g5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/microchip/sama7g5.dtsi b/arch/arm/boot/dts/microchip/sama7g5.dtsi index 269e0a3ca269..7a95464bb78d 100644 --- a/arch/arm/boot/dts/microchip/sama7g5.dtsi +++ b/arch/arm/boot/dts/microchip/sama7g5.dtsi @@ -272,7 +272,7 @@ compatible = "microchip,sama7g5-rtt", "microchip,sam9x60-rtt", "atmel,at91sam9260-rtt"; reg = <0xe001d020 0x30>; interrupts = ; - clocks = <&clk32k 0>; + clocks = <&clk32k 1>; }; clk32k: clock-controller@e001d050 { From 01f986dc6411d9efc4e46845b0dafb312f3cbc1d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Aug 2024 11:56:36 +0200 Subject: [PATCH 104/534] ARM: dts: imx7d-zii-rmu2: fix Ethernet PHY pinctrl property [ Upstream commit 0e49cfe364dea4345551516eb2fe53135a10432b ] There is no "fsl,phy" property in pin controller pincfg nodes: imx7d-zii-rmu2.dtb: pinctrl@302c0000: enet1phyinterruptgrp: 'fsl,pins' is a required property imx7d-zii-rmu2.dtb: pinctrl@302c0000: enet1phyinterruptgrp: 'fsl,phy' does not match any of the regexes: 'pinctrl-[0-9]+' Fixes: f496e6750083 ("ARM: dts: Add ZII support for ZII i.MX7 RMU2 board") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/imx/imx7d-zii-rmu2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-zii-rmu2.dts b/arch/arm/boot/dts/nxp/imx/imx7d-zii-rmu2.dts index 521493342fe9..8f5566027c25 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-zii-rmu2.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-zii-rmu2.dts @@ -350,7 +350,7 @@ &iomuxc_lpsr { pinctrl_enet1_phy_interrupt: enet1phyinterruptgrp { - fsl,phy = < + fsl,pins = < MX7D_PAD_LPSR_GPIO1_IO02__GPIO1_IO2 0x08 >; }; From b2cce50abd4e1ce07c84e9e00b59c6a9816b69a9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 26 Aug 2024 07:49:33 +0200 Subject: [PATCH 105/534] ARM: versatile: fix OF node leak in CPUs prepare [ Upstream commit f2642d97f2105ed17b2ece0c597450f2ff95d704 ] Machine code is leaking OF node reference from of_find_matching_node() in realview_smp_prepare_cpus(). Fixes: 5420b4b15617 ("ARM: realview: add an DT SMP boot method") Signed-off-by: Krzysztof Kozlowski Acked-by: Liviu Dudau Link: https://lore.kernel.org/20240826054934.10724-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/mach-versatile/platsmp-realview.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-versatile/platsmp-realview.c b/arch/arm/mach-versatile/platsmp-realview.c index 5d363385c801..059d796b26bc 100644 --- a/arch/arm/mach-versatile/platsmp-realview.c +++ b/arch/arm/mach-versatile/platsmp-realview.c @@ -66,6 +66,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus) return; } map = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(map)) { pr_err("PLATSMP: No syscon regmap\n"); return; From cfbf049d1605efcf9b3d9413aca4eee8e5814733 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 Aug 2024 16:14:24 +0200 Subject: [PATCH 106/534] reset: berlin: fix OF node leak in probe() error path [ Upstream commit 5f58a88cc91075be38cec69b7cb70aaa4ba69e8b ] Driver is leaking OF node reference on memory allocation failure. Acquire the OF node reference after memory allocation to fix this and keep it simple. Fixes: aed6f3cadc86 ("reset: berlin: convert to a platform driver") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240825-reset-cleanup-scoped-v1-1-03f6d834f8c0@linaro.org Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/reset/reset-berlin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/reset/reset-berlin.c b/drivers/reset/reset-berlin.c index 2537ec05ecee..578fe867080c 100644 --- a/drivers/reset/reset-berlin.c +++ b/drivers/reset/reset-berlin.c @@ -68,13 +68,14 @@ static int berlin_reset_xlate(struct reset_controller_dev *rcdev, static int berlin2_reset_probe(struct platform_device *pdev) { - struct device_node *parent_np = of_get_parent(pdev->dev.of_node); + struct device_node *parent_np; struct berlin_reset_priv *priv; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + parent_np = of_get_parent(pdev->dev.of_node); priv->regmap = syscon_node_to_regmap(parent_np); of_node_put(parent_np); if (IS_ERR(priv->regmap)) From ee7e02e780f1d03e35ba599eb77698b6a9c572a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 Aug 2024 16:14:25 +0200 Subject: [PATCH 107/534] reset: k210: fix OF node leak in probe() error path [ Upstream commit b14e40f5dc7cd0dd7e958010e6ca9ad32ff2ddad ] Driver is leaking OF node reference on memory allocation failure. Acquire the OF node reference after memory allocation to fix this and keep it simple. Fixes: 5a2308da9f60 ("riscv: Add Canaan Kendryte K210 reset controller") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240825-reset-cleanup-scoped-v1-2-03f6d834f8c0@linaro.org Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/reset/reset-k210.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/reset/reset-k210.c b/drivers/reset/reset-k210.c index b62a2fd44e4e..e77e4cca377d 100644 --- a/drivers/reset/reset-k210.c +++ b/drivers/reset/reset-k210.c @@ -90,7 +90,7 @@ static const struct reset_control_ops k210_rst_ops = { static int k210_rst_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *parent_np = of_get_parent(dev->of_node); + struct device_node *parent_np; struct k210_rst *ksr; dev_info(dev, "K210 reset controller\n"); @@ -99,6 +99,7 @@ static int k210_rst_probe(struct platform_device *pdev) if (!ksr) return -ENOMEM; + parent_np = of_get_parent(dev->of_node); ksr->map = syscon_node_to_regmap(parent_np); of_node_put(parent_np); if (IS_ERR(ksr->map)) From 27106b0a292eee1fe321932f0a429eacce030a0a Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Sat, 13 Jul 2024 15:27:13 +0530 Subject: [PATCH 108/534] clocksource/drivers/qcom: Add missing iounmap() on errors in msm_dt_timer_init() [ Upstream commit ca140a0dc0a18acd4653b56db211fec9b2339986 ] Add the missing iounmap() when clock frequency fails to get read by the of_property_read_u32() call, or if the call to msm_timer_init() fails. Fixes: 6e3321631ac2 ("ARM: msm: Add DT support to msm_timer") Signed-off-by: Ankit Agrawal Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240713095713.GA430091@bnew-VirtualBox Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/timer-qcom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-qcom.c b/drivers/clocksource/timer-qcom.c index b4afe3a67583..eac4c95c6127 100644 --- a/drivers/clocksource/timer-qcom.c +++ b/drivers/clocksource/timer-qcom.c @@ -233,6 +233,7 @@ static int __init msm_dt_timer_init(struct device_node *np) } if (of_property_read_u32(np, "clock-frequency", &freq)) { + iounmap(cpu0_base); pr_err("Unknown frequency\n"); return -EINVAL; } @@ -243,7 +244,11 @@ static int __init msm_dt_timer_init(struct device_node *np) freq /= 4; writel_relaxed(DGT_CLK_CTL_DIV_4, source_base + DGT_CLK_CTL); - return msm_timer_init(freq, 32, irq, !!percpu_offset); + ret = msm_timer_init(freq, 32, irq, !!percpu_offset); + if (ret) + iounmap(cpu0_base); + + return ret; } TIMER_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); TIMER_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init); From ecd4adebb852d293f1811dbc5c07375fc355d1cb Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 2 Aug 2024 15:09:50 +0800 Subject: [PATCH 109/534] arm64: dts: mediatek: mt8195: Correct clock order for dp_intf* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51bc68debab9e30b50c6352315950f3cfc309b32 ] The clocks for dp_intf* device nodes are given in the wrong order, causing the binding validation to fail. Fixes: 6c2503b5856a ("arm64: dts: mt8195: Add dp-intf nodes") Signed-off-by: Chen-Yu Tsai Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240802070951.1086616-1-wenst@chromium.org Signed-off-by: Matthias Brugger Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 20e6d90cc411..d21ba00a5bd5 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2766,10 +2766,10 @@ compatible = "mediatek,mt8195-dp-intf"; reg = <0 0x1c015000 0 0x1000>; interrupts = ; - clocks = <&vdosys0 CLK_VDO0_DP_INTF0>, - <&vdosys0 CLK_VDO0_DP_INTF0_DP_INTF>, + clocks = <&vdosys0 CLK_VDO0_DP_INTF0_DP_INTF>, + <&vdosys0 CLK_VDO0_DP_INTF0>, <&apmixedsys CLK_APMIXED_TVDPLL1>; - clock-names = "engine", "pixel", "pll"; + clock-names = "pixel", "engine", "pll"; status = "disabled"; }; @@ -3036,10 +3036,10 @@ reg = <0 0x1c113000 0 0x1000>; interrupts = ; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; - clocks = <&vdosys1 CLK_VDO1_DP_INTF0_MM>, - <&vdosys1 CLK_VDO1_DPINTF>, + clocks = <&vdosys1 CLK_VDO1_DPINTF>, + <&vdosys1 CLK_VDO1_DP_INTF0_MM>, <&apmixedsys CLK_APMIXED_TVDPLL2>; - clock-names = "engine", "pixel", "pll"; + clock-names = "pixel", "engine", "pll"; status = "disabled"; }; From 17c72808dbbd2e6ac752d7c3634ba866dd595995 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 2 Jul 2024 13:21:37 +0000 Subject: [PATCH 110/534] x86/mm: Use IPIs to synchronize LAM enablement [ Upstream commit 3b299b99556c1753923f8d9bbd9304bcd139282f ] LAM can only be enabled when a process is single-threaded. But _kernel_ threads can temporarily use a single-threaded process's mm. If LAM is enabled by a userspace process while a kthread is using its mm, the kthread will not observe LAM enablement (i.e. LAM will be disabled in CR3). This could be fine for the kthread itself, as LAM only affects userspace addresses. However, if the kthread context switches to a thread in the same userspace process, CR3 may or may not be updated because the mm_struct doesn't change (based on pending TLB flushes). If CR3 is not updated, the userspace thread will run incorrectly with LAM disabled, which may cause page faults when using tagged addresses. Example scenario: CPU 1 CPU 2 /* kthread */ kthread_use_mm() /* user thread */ prctl_enable_tagged_addr() /* LAM enabled on CPU 2 */ /* LAM disabled on CPU 1 */ context_switch() /* to CPU 1 */ /* Switching to user thread */ switch_mm_irqs_off() /* CR3 not updated */ /* LAM is still disabled on CPU 1 */ Synchronize LAM enablement by sending an IPI to all CPUs running with the mm_struct to enable LAM. This makes sure LAM is enabled on CPU 1 in the above scenario before prctl_enable_tagged_addr() returns and userspace starts using tagged addresses, and before it's possible to run the userspace process on CPU 1. In switch_mm_irqs_off(), move reading the LAM mask until after mm_cpumask() is updated. This ensures that if an outdated LAM mask is written to CR3, an IPI is received to update it right after IRQs are re-enabled. [ dhansen: Add a LAM enabling helper and comment it ] Fixes: 82721d8b25d7 ("x86/mm: Handle LAM on context switch") Suggested-by: Andy Lutomirski Signed-off-by: Yosry Ahmed Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Link: https://lore.kernel.org/all/20240702132139.3332013-2-yosryahmed%40google.com Signed-off-by: Sasha Levin --- arch/x86/kernel/process_64.c | 29 ++++++++++++++++++++++++++--- arch/x86/mm/tlb.c | 7 +++---- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4989095ab769..d595ef7c1de0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -750,6 +750,27 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) #define LAM_U57_BITS 6 +static void enable_lam_func(void *__mm) +{ + struct mm_struct *mm = __mm; + + if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) { + write_cr3(__read_cr3() | mm->context.lam_cr3_mask); + set_tlbstate_lam_mode(mm); + } +} + +static void mm_enable_lam(struct mm_struct *mm) +{ + /* + * Even though the process must still be single-threaded at this + * point, kernel threads may be using the mm. IPI those kernel + * threads if they exist. + */ + on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true); + set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); +} + static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) { if (!cpu_feature_enabled(X86_FEATURE_LAM)) @@ -766,6 +787,10 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) if (mmap_write_lock_killable(mm)) return -EINTR; + /* + * MM_CONTEXT_LOCK_LAM is set on clone. Prevent LAM from + * being enabled unless the process is single threaded: + */ if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) { mmap_write_unlock(mm); return -EBUSY; @@ -782,9 +807,7 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) return -EINVAL; } - write_cr3(__read_cr3() | mm->context.lam_cr3_mask); - set_tlbstate_lam_mode(mm); - set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); + mm_enable_lam(mm); mmap_write_unlock(mm); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 453ea95b667d..2fbae48f0b47 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -497,9 +497,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, { struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - unsigned long new_lam = mm_lam_cr3_mask(next); bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy); unsigned cpu = smp_processor_id(); + unsigned long new_lam; u64 next_tlb_gen; bool need_flush; u16 new_asid; @@ -622,9 +622,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); } - /* - * Start remote flushes and then read tlb_gen. - */ + /* Start receiving IPIs and then read tlb_gen (and LAM below) */ if (next != &init_mm) cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); @@ -636,6 +634,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, barrier(); } + new_lam = mm_lam_cr3_mask(next); set_tlbstate_lam_mode(next); if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); From 06a95f7184abed0dfede82a6ef88c69cf6fbfdc2 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 17 Jul 2024 19:54:36 +0800 Subject: [PATCH 111/534] ASoC: rt5682s: Return devm_of_clk_add_hw_provider to transfer the error [ Upstream commit 3ff810b9bebe5578a245cfa97c252ab602e703f1 ] Return devm_of_clk_add_hw_provider() in order to transfer the error, if it fails due to resource allocation failure or device tree clock provider registration failure. Fixes: bdd229ab26be ("ASoC: rt5682s: Add driver for ALC5682I-VS codec") Signed-off-by: Ma Ke Link: https://patch.msgid.link/20240717115436.3449492-1-make24@iscas.ac.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682s.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index 68ac5ea50396..92c647d439ec 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -2828,7 +2828,9 @@ static int rt5682s_register_dai_clks(struct snd_soc_component *component) } if (dev->of_node) { - devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, dai_clk_hw); + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, dai_clk_hw); + if (ret) + return ret; } else { ret = devm_clk_hw_register_clkdev(dev, dai_clk_hw, init.name, dev_name(dev)); From c0f6521806963ba3f5303f2e66aa7b5c2dba345a Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 9 Feb 2024 06:59:34 +0100 Subject: [PATCH 112/534] ASoC: tas2781: remove unused acpi_subysystem_id [ Upstream commit 4089d82e67a9967fc5bf2b4e5ef820d67fe73924 ] The acpi_subysystem_id is only written and freed, not read, so unnecessary. Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/454639336be28d2b50343e9c8366a56b0975e31d.1707456753.git.soyer@irl.hu Signed-off-by: Mark Brown Stable-dep-of: c2c0b67dca3c ("ASoC: tas2781-i2c: Drop weird GPIO code") Signed-off-by: Sasha Levin --- include/sound/tas2781.h | 1 - sound/pci/hda/tas2781_hda_i2c.c | 12 ------------ sound/soc/codecs/tas2781-comlib.c | 1 - 3 files changed, 14 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index be58d870505a..be6f70dd54f9 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -101,7 +101,6 @@ struct tasdevice_priv { struct tm tm; enum device_catlog_id catlog_id; - const char *acpi_subsystem_id; unsigned char cal_binaryname[TASDEVICE_MAX_CHANNELS][64]; unsigned char crc8_lkp_tbl[CRC8_TABLE_SIZE]; unsigned char coef_binaryname[64]; diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index e5bb1fed26a0..0a587f55583f 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -95,9 +95,7 @@ static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data) static int tas2781_read_acpi(struct tasdevice_priv *p, const char *hid) { struct acpi_device *adev; - struct device *physdev; LIST_HEAD(resources); - const char *sub; int ret; adev = acpi_dev_get_first_match_dev(hid, NULL, -1); @@ -113,18 +111,8 @@ static int tas2781_read_acpi(struct tasdevice_priv *p, const char *hid) acpi_dev_free_resource_list(&resources); strscpy(p->dev_name, hid, sizeof(p->dev_name)); - physdev = get_device(acpi_get_first_physical_node(adev)); acpi_dev_put(adev); - /* No side-effect to the playback even if subsystem_id is NULL*/ - sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev)); - if (IS_ERR(sub)) - sub = NULL; - - p->acpi_subsystem_id = sub; - - put_device(physdev); - return 0; err: diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c index 5d0e5348b361..3aa81514dad7 100644 --- a/sound/soc/codecs/tas2781-comlib.c +++ b/sound/soc/codecs/tas2781-comlib.c @@ -408,7 +408,6 @@ void tasdevice_remove(struct tasdevice_priv *tas_priv) { if (gpio_is_valid(tas_priv->irq_info.irq_gpio)) gpio_free(tas_priv->irq_info.irq_gpio); - kfree(tas_priv->acpi_subsystem_id); mutex_destroy(&tas_priv->codec_lock); } EXPORT_SYMBOL_GPL(tasdevice_remove); From ac7976b672770b0d7722e0885c3f6543a8a3cacf Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 2 Jul 2024 15:54:01 -0600 Subject: [PATCH 113/534] ASoC: tas2781: Use of_property_read_reg() [ Upstream commit 31a45f9190b5b4f5cd8cdec8471babd5215eee04 ] Replace the open-coded parsing of "reg" with of_property_read_reg(). The #ifdef is also easily replaced with IS_ENABLED(). Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20240702215402.839673-1-robh@kernel.org Signed-off-by: Mark Brown Stable-dep-of: c2c0b67dca3c ("ASoC: tas2781-i2c: Drop weird GPIO code") Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2781-i2c.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index a9d179e30773..61a64d18a7d5 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -633,33 +634,20 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) tas_priv->irq_info.irq_gpio = acpi_dev_gpio_irq_get(ACPI_COMPANION(&client->dev), 0); - } else { + } else if (IS_ENABLED(CONFIG_OF)) { struct device_node *np = tas_priv->dev->of_node; -#ifdef CONFIG_OF - const __be32 *reg, *reg_end; - int len, sw, aw; + u64 addr; - aw = of_n_addr_cells(np); - sw = of_n_size_cells(np); - if (sw == 0) { - reg = (const __be32 *)of_get_property(np, - "reg", &len); - reg_end = reg + len/sizeof(*reg); - ndev = 0; - do { - dev_addrs[ndev] = of_read_number(reg, aw); - reg += aw; - ndev++; - } while (reg < reg_end); - } else { - ndev = 1; - dev_addrs[0] = client->addr; + for (i = 0; i < TASDEVICE_MAX_CHANNELS; i++) { + if (of_property_read_reg(np, i, &addr, NULL)) + break; + dev_addrs[ndev++] = addr; } -#else + + tas_priv->irq_info.irq_gpio = of_irq_get(np, 0); + } else { ndev = 1; dev_addrs[0] = client->addr; -#endif - tas_priv->irq_info.irq_gpio = of_irq_get(np, 0); } tas_priv->ndev = ndev; for (i = 0; i < ndev; i++) From 92b53ece5d3701c526ee5ac626ddd752548d2c01 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Aug 2024 17:02:32 +0200 Subject: [PATCH 114/534] ASoC: tas2781-i2c: Drop weird GPIO code [ Upstream commit c2c0b67dca3cb3b3cea0dd60075a1c5ba77e2fcd ] The tas2781-i2c driver gets an IRQ from either ACPI or device tree, then proceeds to check if the IRQ has a corresponding GPIO and in case it does enforce the GPIO as input and set a label on it. This is abuse of the API: - First we cannot guarantee that the numberspaces of the GPIOs and the IRQs are the same, i.e that an IRQ number corresponds to a GPIO number like that. - Second, GPIO chips and IRQ chips should be treated as orthogonal APIs, the irqchip needs to ascertain that the backing GPIO line is set to input etc just using the irqchip. - Third it is using the legacy API which should not be used in new code yet this was added just a year ago. Delete the offending code. If this creates problems the GPIO and irqchip maintainers can help to fix the issues. It *should* not create any problems, because the irq isn't used anywhere in the driver, it's just obtained and then left unused. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20240807-asoc-tas-gpios-v2-1-bd0f2705d58b@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/sound/tas2781.h | 7 +------ sound/pci/hda/tas2781_hda_i2c.c | 2 +- sound/soc/codecs/tas2781-comlib.c | 3 --- sound/soc/codecs/tas2781-fmwlib.c | 1 - sound/soc/codecs/tas2781-i2c.c | 24 +++--------------------- 5 files changed, 5 insertions(+), 32 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index be6f70dd54f9..f97f386e5a55 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -78,11 +78,6 @@ struct tasdevice { bool is_loaderr; }; -struct tasdevice_irqinfo { - int irq_gpio; - int irq; -}; - struct calidata { unsigned char *data; unsigned long total_sz; @@ -90,7 +85,6 @@ struct calidata { struct tasdevice_priv { struct tasdevice tasdevice[TASDEVICE_MAX_CHANNELS]; - struct tasdevice_irqinfo irq_info; struct tasdevice_rca rcabin; struct calidata cali_data; struct tasdevice_fw *fmw; @@ -111,6 +105,7 @@ struct tasdevice_priv { unsigned int chip_id; unsigned int sysclk; + int irq; int cur_prog; int cur_conf; int fw_state; diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 0a587f55583f..980e6104c2f3 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -710,7 +710,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) } else return -ENODEV; - tas_hda->priv->irq_info.irq = clt->irq; + tas_hda->priv->irq = clt->irq; ret = tas2781_read_acpi(tas_hda->priv, device_name); if (ret) return dev_err_probe(tas_hda->dev, ret, diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c index 3aa81514dad7..0444cf90c511 100644 --- a/sound/soc/codecs/tas2781-comlib.c +++ b/sound/soc/codecs/tas2781-comlib.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -406,8 +405,6 @@ EXPORT_SYMBOL_GPL(tasdevice_dsp_remove); void tasdevice_remove(struct tasdevice_priv *tas_priv) { - if (gpio_is_valid(tas_priv->irq_info.irq_gpio)) - gpio_free(tas_priv->irq_info.irq_gpio); mutex_destroy(&tas_priv->codec_lock); } EXPORT_SYMBOL_GPL(tasdevice_remove); diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 3639dcd0bbb2..629e2195a890 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 61a64d18a7d5..b25978f01cd2 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -617,7 +616,7 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) { struct i2c_client *client = (struct i2c_client *)tas_priv->client; unsigned int dev_addrs[TASDEVICE_MAX_CHANNELS]; - int rc, i, ndev = 0; + int i, ndev = 0; if (tas_priv->isacpi) { ndev = device_property_read_u32_array(&client->dev, @@ -632,7 +631,7 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) "ti,audio-slots", dev_addrs, ndev); } - tas_priv->irq_info.irq_gpio = + tas_priv->irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(&client->dev), 0); } else if (IS_ENABLED(CONFIG_OF)) { struct device_node *np = tas_priv->dev->of_node; @@ -644,7 +643,7 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) dev_addrs[ndev++] = addr; } - tas_priv->irq_info.irq_gpio = of_irq_get(np, 0); + tas_priv->irq = of_irq_get(np, 0); } else { ndev = 1; dev_addrs[0] = client->addr; @@ -660,23 +659,6 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) __func__); strcpy(tas_priv->dev_name, tasdevice_id[tas_priv->chip_id].name); - - if (gpio_is_valid(tas_priv->irq_info.irq_gpio)) { - rc = gpio_request(tas_priv->irq_info.irq_gpio, - "AUDEV-IRQ"); - if (!rc) { - gpio_direction_input( - tas_priv->irq_info.irq_gpio); - - tas_priv->irq_info.irq = - gpio_to_irq(tas_priv->irq_info.irq_gpio); - } else - dev_err(tas_priv->dev, "%s: GPIO %d request error\n", - __func__, tas_priv->irq_info.irq_gpio); - } else - dev_err(tas_priv->dev, - "Looking up irq-gpio property failed %d\n", - tas_priv->irq_info.irq_gpio); } static int tasdevice_i2c_probe(struct i2c_client *i2c) From 7000e5f31ccfcf82bac6d291af60ac301fa91eb8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Aug 2024 17:02:33 +0200 Subject: [PATCH 115/534] ASoC: tas2781-i2c: Get the right GPIO line [ Upstream commit 1c4b509edad15192bfb64c81d3c305bbae8070db ] The code is obtaining a GPIO reset using the reset GPIO name "reset-gpios", but the gpiolib is already adding the suffix "-gpios" to anything passed to this function and will be looking for "reset-gpios-gpios" which is most certainly not what the author desired. Fix it up. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20240807-asoc-tas-gpios-v2-2-bd0f2705d58b@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2781-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index b25978f01cd2..43775c194445 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -653,7 +653,7 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) tas_priv->tasdevice[i].dev_addr = dev_addrs[i]; tas_priv->reset = devm_gpiod_get_optional(&client->dev, - "reset-gpios", GPIOD_OUT_HIGH); + "reset", GPIOD_OUT_HIGH); if (IS_ERR(tas_priv->reset)) dev_err(tas_priv->dev, "%s Can't get reset GPIO\n", __func__); From c239cfa322ee40dad496c15b844a092d31496f57 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 13 Jun 2024 07:12:10 +0900 Subject: [PATCH 116/534] selftests/ftrace: Add required dependency for kprobe tests [ Upstream commit 41f37c852ac3fbfd072a00281b60dc7ba056be8c ] kprobe_args_{char,string}.tc are using available_filter_functions file which is provided by function tracer. Thus if function tracer is disabled, these tests are failed on recent kernels because tracefs_create_dir is not raised events by adding a dynamic event. Add available_filter_functions to requires line. Fixes: 7c1130ea5cae ("test: ftrace: Fix kprobe test for eventfs") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- .../testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_args_string.tc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc index ff7499eb98d6..ce5d2e62731f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event char type argument -# requires: kprobe_events +# requires: kprobe_events available_filter_functions case `uname -m` in x86_64) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a202b2ea4baf..4f72c2875f6b 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event string type argument -# requires: kprobe_events +# requires: kprobe_events available_filter_functions case `uname -m` in x86_64) From cc08ac5f42c3e5394cd23399325ec4a005588e15 Mon Sep 17 00:00:00 2001 From: Yuntao Liu Date: Thu, 15 Aug 2024 09:13:12 +0000 Subject: [PATCH 117/534] ALSA: hda: cs35l41: fix module autoloading [ Upstream commit 48f1434a4632c7da1a6a94e159512ebddbe13392 ] Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from spi_device_id table. Fixes: 7b2f3eb492da ("ALSA: hda: cs35l41: Add support for CS35L41 in HDA systems") Signed-off-by: Yuntao Liu Link: https://patch.msgid.link/20240815091312.757139-1-liuyuntao12@huawei.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/cs35l41_hda_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c index eb287aa5f782..d95954ce55d8 100644 --- a/sound/pci/hda/cs35l41_hda_spi.c +++ b/sound/pci/hda/cs35l41_hda_spi.c @@ -38,6 +38,7 @@ static const struct spi_device_id cs35l41_hda_spi_id[] = { { "cs35l41-hda", 0 }, {} }; +MODULE_DEVICE_TABLE(spi, cs35l41_hda_spi_id); static const struct acpi_device_id cs35l41_acpi_hda_match[] = { { "CSC3551", 0 }, From 886ea81de41f7ee04dc5e0cfc54037df2769ce8c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sun, 11 Aug 2024 10:12:29 +1000 Subject: [PATCH 118/534] m68k: Fix kernel_clone_args.flags in m68k_clone() [ Upstream commit 09b3d870faa7bc3e96c0978ab3cf4e96e4b15571 ] Stan Johnson recently reported a failure from the 'dump' command: DUMP: Date of this level 0 dump: Fri Aug 9 23:37:15 2024 DUMP: Dumping /dev/sda (an unlisted file system) to /dev/null DUMP: Label: none DUMP: Writing 10 Kilobyte records DUMP: mapping (Pass I) [regular files] DUMP: mapping (Pass II) [directories] DUMP: estimated 3595695 blocks. DUMP: Context save fork fails in parent 671 The dump program uses the clone syscall with the CLONE_IO flag, that is, flags == 0x80000000. When that value is promoted from long int to u64 by m68k_clone(), it undergoes sign-extension. The new value includes CLONE_INTO_CGROUP so the validation in cgroup_css_set_fork() fails and the syscall returns -EBADF. Avoid sign-extension by casting to u32. Reported-by: Stan Johnson Closes: https://lists.debian.org/debian-68k/2024/08/msg00000.html Fixes: 6aabc1facdb2 ("m68k: Implement copy_thread_tls()") Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/3463f1e5d4e95468dc9f3368f2b78ffa7b72199b.1723335149.git.fthain@linux-m68k.org Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/m68k/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index e06ce147c0b7..fb87219fc3b4 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -116,7 +116,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs) { /* regs will be equal to current_pt_regs() */ struct kernel_clone_args args = { - .flags = regs->d1 & ~CSIGNAL, + .flags = (u32)(regs->d1) & ~CSIGNAL, .pidfd = (int __user *)regs->d3, .child_tid = (int __user *)regs->d4, .parent_tid = (int __user *)regs->d3, From f606b9ac4abbe56816eee1941dccae9b6d3d8952 Mon Sep 17 00:00:00 2001 From: tangbin Date: Tue, 3 Sep 2024 17:06:20 +0800 Subject: [PATCH 119/534] ASoC: loongson: fix error release [ Upstream commit 97688a9c5b1fd2b826c682cdfa36d411a5c99828 ] In function loongson_card_parse_of(), when get device_node 'codec' failed, the function of_node_put(codec) should not be invoked, thus fix error release. Fixes: d24028606e76 ("ASoC: loongson: Add Loongson ASoC Sound Card Support") Signed-off-by: tangbin Link: https://patch.msgid.link/20240903090620.6276-1-tangbin@cmss.chinamobile.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/loongson/loongson_card.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/loongson/loongson_card.c b/sound/soc/loongson/loongson_card.c index 406ee8db1a3c..8cc54aedd002 100644 --- a/sound/soc/loongson/loongson_card.c +++ b/sound/soc/loongson/loongson_card.c @@ -127,8 +127,8 @@ static int loongson_card_parse_of(struct loongson_card_data *data) codec = of_get_child_by_name(dev->of_node, "codec"); if (!codec) { dev_err(dev, "audio-codec property missing or invalid\n"); - ret = -EINVAL; - goto err; + of_node_put(cpu); + return -EINVAL; } for (i = 0; i < card->num_links; i++) { From 0c7af15f64b6f68539fbee8411f7126fa6acb613 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 18 Jul 2024 09:52:01 -0700 Subject: [PATCH 120/534] hwmon: (max16065) Fix overflows seen when writing limits [ Upstream commit 744ec4477b11c42e2c8de9eb8364675ae7a0bd81 ] Writing large limits resulted in overflows as reported by module tests. in0_lcrit: Suspected overflow: [max=5538, read 0, written 2147483647] in0_crit: Suspected overflow: [max=5538, read 0, written 2147483647] in0_min: Suspected overflow: [max=5538, read 0, written 2147483647] Fix the problem by clamping prior to multiplications and the use of DIV_ROUND_CLOSEST, and by using consistent variable types. Reviewed-by: Tzung-Bi Shih Fixes: f5bae2642e3d ("hwmon: Driver for MAX16065 System Manager and compatibles") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/max16065.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c index aa38c45adc09..e392529de098 100644 --- a/drivers/hwmon/max16065.c +++ b/drivers/hwmon/max16065.c @@ -114,9 +114,10 @@ static inline int LIMIT_TO_MV(int limit, int range) return limit * range / 256; } -static inline int MV_TO_LIMIT(int mv, int range) +static inline int MV_TO_LIMIT(unsigned long mv, int range) { - return clamp_val(DIV_ROUND_CLOSEST(mv * 256, range), 0, 255); + mv = clamp_val(mv, 0, ULONG_MAX / 256); + return DIV_ROUND_CLOSEST(clamp_val(mv * 256, 0, range * 255), range); } static inline int ADC_TO_CURR(int adc, int gain) From fc702f5c3d240dc7d8cbcdb4b5b533a536f55147 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Wed, 3 Apr 2024 15:36:21 -0500 Subject: [PATCH 121/534] hwmon: (max16065) Remove use of i2c_match_id() [ Upstream commit 5a71654b398e3471f0169c266a3587cf09e1200c ] The function i2c_match_id() is used to fetch the matching ID from the i2c_device_id table. This is often used to then retrieve the matching driver_data. This can be done in one step with the helper i2c_get_match_data(). This helper has a couple other benefits: * It doesn't need the i2c_device_id passed in so we do not need to have that forward declared, allowing us to remove those or move the i2c_device_id table down to its more natural spot with the other module info. * It also checks for device match data, which allows for OF and ACPI based probing. That means we do not have to manually check those first and can remove those checks. Signed-off-by: Andrew Davis Link: https://lore.kernel.org/r/20240403203633.914389-20-afd@ti.com Signed-off-by: Guenter Roeck Stable-dep-of: 119abf7d1815 ("hwmon: (max16065) Fix alarm attributes") Signed-off-by: Sasha Levin --- drivers/hwmon/max16065.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c index e392529de098..5b2a174c6bad 100644 --- a/drivers/hwmon/max16065.c +++ b/drivers/hwmon/max16065.c @@ -494,8 +494,6 @@ static const struct attribute_group max16065_max_group = { .is_visible = max16065_secondary_is_visible, }; -static const struct i2c_device_id max16065_id[]; - static int max16065_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; @@ -506,7 +504,7 @@ static int max16065_probe(struct i2c_client *client) bool have_secondary; /* true if chip has secondary limits */ bool secondary_is_max = false; /* secondary limits reflect max */ int groups = 0; - const struct i2c_device_id *id = i2c_match_id(max16065_id, client); + enum chips chip = (uintptr_t)i2c_get_match_data(client); if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_READ_WORD_DATA)) @@ -519,9 +517,9 @@ static int max16065_probe(struct i2c_client *client) data->client = client; mutex_init(&data->update_lock); - data->num_adc = max16065_num_adc[id->driver_data]; - data->have_current = max16065_have_current[id->driver_data]; - have_secondary = max16065_have_secondary[id->driver_data]; + data->num_adc = max16065_num_adc[chip]; + data->have_current = max16065_have_current[chip]; + have_secondary = max16065_have_secondary[chip]; if (have_secondary) { val = i2c_smbus_read_byte_data(client, MAX16065_SW_ENABLE); From 0a27e17475d4242c3ebe9cad6f4b7d91c4f1aa91 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 21 Jul 2024 06:41:17 -0700 Subject: [PATCH 122/534] hwmon: (max16065) Fix alarm attributes [ Upstream commit 119abf7d1815f098f7f91ae7abc84324a19943d7 ] Chips reporting overcurrent alarms report it in the second alarm register. That means the second alarm register has to be read, even if the chip only supports 8 or fewer ADC channels. MAX16067 and MAX16068 report undervoltage and overvoltage alarms in separate registers. Fold register contents together to report both with the existing alarm attribute. This requires actually storing the chip type in struct max16065_data. Rename the variable 'chip' to match the variable name used in the probe function. Reviewed-by: Tzung-Bi Shih Fixes: f5bae2642e3d ("hwmon: Driver for MAX16065 System Manager and compatibles") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/max16065.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c index 5b2a174c6bad..0ccb5eb596fc 100644 --- a/drivers/hwmon/max16065.c +++ b/drivers/hwmon/max16065.c @@ -79,7 +79,7 @@ static const bool max16065_have_current[] = { }; struct max16065_data { - enum chips type; + enum chips chip; struct i2c_client *client; const struct attribute_group *groups[4]; struct mutex update_lock; @@ -162,10 +162,17 @@ static struct max16065_data *max16065_update_device(struct device *dev) MAX16065_CURR_SENSE); } - for (i = 0; i < DIV_ROUND_UP(data->num_adc, 8); i++) + for (i = 0; i < 2; i++) data->fault[i] = i2c_smbus_read_byte_data(client, MAX16065_FAULT(i)); + /* + * MAX16067 and MAX16068 have separate undervoltage and + * overvoltage alarm bits. Squash them together. + */ + if (data->chip == max16067 || data->chip == max16068) + data->fault[0] |= data->fault[1]; + data->last_updated = jiffies; data->valid = true; } @@ -514,6 +521,7 @@ static int max16065_probe(struct i2c_client *client) if (unlikely(!data)) return -ENOMEM; + data->chip = chip; data->client = client; mutex_init(&data->update_lock); From 590960a5b3b30dfb2bd07caf64df6755ab41e246 Mon Sep 17 00:00:00 2001 From: Mirsad Todorovac Date: Fri, 12 Jul 2024 01:43:20 +0200 Subject: [PATCH 123/534] mtd: slram: insert break after errors in parsing the map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 336c218dd7f0588ed8a7345f367975a00a4f003f ] GCC 12.3.0 compiler on linux-next next-20240709 tree found the execution path in which, due to lazy evaluation, devlength isn't initialised with the parsed string: 289 while (map) { 290 devname = devstart = devlength = NULL; 291 292 if (!(devname = strsep(&map, ","))) { 293 E("slram: No devicename specified.\n"); 294 break; 295 } 296 T("slram: devname = %s\n", devname); 297 if ((!map) || (!(devstart = strsep(&map, ",")))) { 298 E("slram: No devicestart specified.\n"); 299 } 300 T("slram: devstart = %s\n", devstart); → 301 if ((!map) || (!(devlength = strsep(&map, ",")))) { 302 E("slram: No devicelength / -end specified.\n"); 303 } → 304 T("slram: devlength = %s\n", devlength); 305 if (parse_cmdline(devname, devstart, devlength) != 0) { 306 return(-EINVAL); 307 } Parsing should be finished after map == NULL, so a break is best inserted after each E("slram: ... \n") error message. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Mirsad Todorovac Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240711234319.637824-1-mtodorovac69@gmail.com Signed-off-by: Sasha Levin --- drivers/mtd/devices/slram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 28131a127d06..8297b366a066 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -296,10 +296,12 @@ static int __init init_slram(void) T("slram: devname = %s\n", devname); if ((!map) || (!(devstart = strsep(&map, ",")))) { E("slram: No devicestart specified.\n"); + break; } T("slram: devstart = %s\n", devstart); if ((!map) || (!(devlength = strsep(&map, ",")))) { E("slram: No devicelength / -end specified.\n"); + break; } T("slram: devlength = %s\n", devlength); if (parse_cmdline(devname, devstart, devlength) != 0) { From cbb2313e76d3ae20965fb49e74f6c96b328a12cb Mon Sep 17 00:00:00 2001 From: Yuntao Liu Date: Thu, 15 Aug 2024 08:30:21 +0000 Subject: [PATCH 124/534] hwmon: (ntc_thermistor) fix module autoloading [ Upstream commit b6964d66a07a9003868e428a956949e17ab44d7e ] Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from of_device_id table. Fixes: 9e8269de100d ("hwmon: (ntc_thermistor) Add DT with IIO support to NTC thermistor driver") Signed-off-by: Yuntao Liu Message-ID: <20240815083021.756134-1-liuyuntao12@huawei.com> Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ntc_thermistor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index ef75b63f5894..b5352900463f 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -62,6 +62,7 @@ static const struct platform_device_id ntc_thermistor_id[] = { [NTC_SSG1404001221] = { "ssg1404_001221", TYPE_NCPXXWB473 }, [NTC_LAST] = { }, }; +MODULE_DEVICE_TABLE(platform, ntc_thermistor_id); /* * A compensation table should be sorted by the values of .ohm From 05dba1274e7da97a165e3c34803f35e7f685a8c7 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 21 Aug 2024 16:54:43 -0500 Subject: [PATCH 125/534] power: supply: axp20x_battery: Remove design from min and max voltage [ Upstream commit 61978807b00f8a1817b0e5580981af1cd2f428a5 ] The POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN and POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN values should be immutable properties of the battery, but for this driver they are writable values and used as the minimum and maximum values for charging. Remove the DESIGN designation from these values. Fixes: 46c202b5f25f ("power: supply: add battery driver for AXP20X and AXP22X PMICs") Suggested-by: Chen-Yu Tsai Signed-off-by: Chris Morgan Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240821215456.962564-3-macroalpha82@gmail.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/axp20x_battery.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index 6ac5c80cfda2..7520b599eb3d 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -303,11 +303,11 @@ static int axp20x_battery_get_prop(struct power_supply *psy, val->intval = reg & AXP209_FG_PERCENT; break; - case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + case POWER_SUPPLY_PROP_VOLTAGE_MAX: return axp20x_batt->data->get_max_voltage(axp20x_batt, &val->intval); - case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + case POWER_SUPPLY_PROP_VOLTAGE_MIN: ret = regmap_read(axp20x_batt->regmap, AXP20X_V_OFF, ®); if (ret) return ret; @@ -455,10 +455,10 @@ static int axp20x_battery_set_prop(struct power_supply *psy, struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy); switch (psp) { - case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + case POWER_SUPPLY_PROP_VOLTAGE_MIN: return axp20x_set_voltage_min_design(axp20x_batt, val->intval); - case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + case POWER_SUPPLY_PROP_VOLTAGE_MAX: return axp20x_batt->data->set_max_voltage(axp20x_batt, val->intval); case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: @@ -493,8 +493,8 @@ static enum power_supply_property axp20x_battery_props[] = { POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT, POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX, POWER_SUPPLY_PROP_HEALTH, - POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, - POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MAX, + POWER_SUPPLY_PROP_VOLTAGE_MIN, POWER_SUPPLY_PROP_CAPACITY, }; @@ -502,8 +502,8 @@ static int axp20x_battery_prop_writeable(struct power_supply *psy, enum power_supply_property psp) { return psp == POWER_SUPPLY_PROP_STATUS || - psp == POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN || - psp == POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN || + psp == POWER_SUPPLY_PROP_VOLTAGE_MIN || + psp == POWER_SUPPLY_PROP_VOLTAGE_MAX || psp == POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT || psp == POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX; } From 508a550eec10fe6689b71b0e35da78acac4be192 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Sat, 17 Aug 2024 12:51:14 +0200 Subject: [PATCH 126/534] power: supply: max17042_battery: Fix SOC threshold calc w/ no current sense [ Upstream commit 3a3acf839b2cedf092bdd1ff65b0e9895df1656b ] Commit 223a3b82834f ("power: supply: max17042_battery: use VFSOC for capacity when no rsns") made it so that capacity on systems without current sensing would be read from VFSOC instead of RepSOC. However, the SOC threshold calculation still read RepSOC to get the SOC regardless of the current sensing option state. Fix this by applying the same conditional to determine which register should be read. This also seems to be the intended behavior as per the datasheet - SOC alert config value in MiscCFG on setups without current sensing is set to a value of 0b11, indicating SOC alerts being generated based on VFSOC, instead of 0b00 which indicates SOC alerts being generated based on RepSOC. This fixes an issue on the Galaxy S3/Midas boards, where the alert interrupt would be constantly retriggered, causing high CPU usage on idle (around ~12%-15%). Fixes: e5f3872d2044 ("max17042: Add support for signalling change in SOC") Signed-off-by: Artur Weber Reviewed-by: Henrik Grimler Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240817-max17042-soc-threshold-fix-v1-1-72b45899c3cc@gmail.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/max17042_battery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index 17ac2ab78c4e..ab97dd7ca5cb 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -853,7 +853,10 @@ static void max17042_set_soc_threshold(struct max17042_chip *chip, u16 off) /* program interrupt thresholds such that we should * get interrupt for every 'off' perc change in the soc */ - regmap_read(map, MAX17042_RepSOC, &soc); + if (chip->pdata->enable_current_sense) + regmap_read(map, MAX17042_RepSOC, &soc); + else + regmap_read(map, MAX17042_VFSOC, &soc); soc >>= 8; soc_tr = (soc + off) << 8; if (off < soc) From 9b97d6b08b7ffd9547329f5d290698a910c5913f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 1 Aug 2024 22:34:39 +0200 Subject: [PATCH 127/534] fbdev: hpfb: Fix an error handling path in hpfb_dio_probe() [ Upstream commit aa578e897520f32ae12bec487f2474357d01ca9c ] If an error occurs after request_mem_region(), a corresponding release_mem_region() should be called, as already done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/hpfb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index 406c1383cbda..1461a909e17e 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -343,6 +343,7 @@ static int hpfb_dio_probe(struct dio_dev *d, const struct dio_device_id *ent) if (hpfb_init_one(paddr, vaddr)) { if (d->scode >= DIOII_SCBASE) iounmap((void *)vaddr); + release_mem_region(d->resource.start, resource_size(&d->resource)); return -ENOMEM; } return 0; From e109a01f3d56096755b17023759122aee149f2d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:23 -0300 Subject: [PATCH 128/534] iommu/amd: Do not set the D bit on AMD v2 table entries [ Upstream commit 2910a7fa1be090fc7637cef0b2e70bcd15bf5469 ] The manual says that bit 6 is IGN for all Page-Table Base Address pointers, don't set it. Fixes: aaac38f61487 ("iommu/amd: Initial support for AMD IOMMU v2 page table") Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/14-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/io_pgtable_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index e9ef2e0a62f6..cbf0c4601512 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -50,7 +50,7 @@ static inline u64 set_pgtable_attr(u64 *page) u64 prot; prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; - prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + prot |= IOMMU_PAGE_ACCESS; return (iommu_virt_to_phys(page) | prot); } From 4e31e504201fb240f52cacf68b673ba8a8c9b303 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Wed, 28 Aug 2024 17:24:27 +0800 Subject: [PATCH 129/534] mtd: powernv: Add check devm_kasprintf() returned value [ Upstream commit 395999829880a106bb95f0ce34e6e4c2b43c6a5d ] devm_kasprintf() can return a NULL pointer on failure but this returned value is not checked. Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of device_node.name") Signed-off-by: Charles Han Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240828092427.128177-1-hanchunchao@inspur.com Signed-off-by: Sasha Levin --- drivers/mtd/devices/powernv_flash.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c index 36e060386e59..59e1b3a4406e 100644 --- a/drivers/mtd/devices/powernv_flash.c +++ b/drivers/mtd/devices/powernv_flash.c @@ -207,6 +207,9 @@ static int powernv_flash_set_driver_info(struct device *dev, * get them */ mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node); + if (!mtd->name) + return -ENOMEM; + mtd->type = MTD_NORFLASH; mtd->flags = MTD_WRITEABLE; mtd->size = size; From 9b52ee18f6d2f0e845b0dd5ba35edc02ba318827 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Aug 2024 00:56:40 +0200 Subject: [PATCH 130/534] rcu/nocb: Fix RT throttling hrtimer armed from offline CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9139f93209d1ffd7f489ab19dee01b7c3a1a43d2 ] After a CPU is marked offline and until it reaches its final trip to idle, rcuo has several opportunities to be woken up, either because a callback has been queued in the meantime or because rcutree_report_cpu_dead() has issued the final deferred NOCB wake up. If RCU-boosting is enabled, RCU kthreads are set to SCHED_FIFO policy. And if RT-bandwidth is enabled, the related hrtimer might be armed. However this then happens after hrtimers have been migrated at the CPUHP_AP_HRTIMERS_DYING stage, which is broken as reported by the following warning: Call trace: enqueue_hrtimer+0x7c/0xf8 hrtimer_start_range_ns+0x2b8/0x300 enqueue_task_rt+0x298/0x3f0 enqueue_task+0x94/0x188 ttwu_do_activate+0xb4/0x27c try_to_wake_up+0x2d8/0x79c wake_up_process+0x18/0x28 __wake_nocb_gp+0x80/0x1a0 do_nocb_deferred_wakeup_common+0x3c/0xcc rcu_report_dead+0x68/0x1ac cpuhp_report_idle_dead+0x48/0x9c do_idle+0x288/0x294 cpu_startup_entry+0x34/0x3c secondary_start_kernel+0x138/0x158 Fix this with waking up rcuo using an IPI if necessary. Since the existing API to deal with this situation only handles swait queue, rcuo is only woken up from offline CPUs if it's not already waiting on a grace period. In the worst case some callbacks will just wait for a grace period to complete before being assigned to a subsequent one. Reported-by: "Cheng-Jui Wang (王正睿)" Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Signed-off-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay Signed-off-by: Sasha Levin --- kernel/rcu/tree_nocb.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 30b34f215ca3..e019f166daa6 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -220,7 +220,10 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp, raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); if (needwake) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake")); - wake_up_process(rdp_gp->nocb_gp_kthread); + if (cpu_is_offline(raw_smp_processor_id())) + swake_up_one_online(&rdp_gp->nocb_gp_wq); + else + wake_up_process(rdp_gp->nocb_gp_kthread); } return needwake; From ca63b1cbcd9920a1106003d981115448fb04bbc1 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 26 Aug 2024 17:43:25 +0800 Subject: [PATCH 131/534] mtd: rawnand: mtk: Use for_each_child_of_node_scoped() [ Upstream commit 8795952679494b111b7b2ba08bb54ac408daca3b ] Avoids the need for manual cleanup of_node_put() in early exits from the loop. Signed-off-by: Jinjie Ruan Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240826094328.2991664-8-ruanjinjie@huawei.com Stable-dep-of: 2073ae37d550 ("mtd: rawnand: mtk: Fix init error path") Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/mtk_nand.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index 29c8bddde67f..c4ef4eae5da2 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1432,15 +1432,12 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc) { struct device_node *np = dev->of_node; - struct device_node *nand_np; int ret; - for_each_child_of_node(np, nand_np) { + for_each_child_of_node_scoped(np, nand_np) { ret = mtk_nfc_nand_chip_init(dev, nfc, nand_np); - if (ret) { - of_node_put(nand_np); + if (ret) return ret; - } } return 0; From e502a0db342245cca5b089de876a44ee1c97d3e0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 17:30:18 +0200 Subject: [PATCH 132/534] mtd: rawnand: mtk: Factorize out the logic cleaning mtk chips [ Upstream commit 81cb3be3261e766a1f8efab9e3154a4f4fd9d03d ] There are some un-freed resources in one of the error path which would benefit from a helper going through all the registered mtk chips one by one and perform all the necessary cleanup. This is precisely what the remove path does, so let's extract the logic in a helper. There is no functional change. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/linux-mtd/20240826153019.67106-1-miquel.raynal@bootlin.com Stable-dep-of: 2073ae37d550 ("mtd: rawnand: mtk: Fix init error path") Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/mtk_nand.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index c4ef4eae5da2..3fb32a59fdf6 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1429,6 +1429,23 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, return 0; } +static void mtk_nfc_nand_chips_cleanup(struct mtk_nfc *nfc) +{ + struct mtk_nfc_nand_chip *mtk_chip; + struct nand_chip *chip; + int ret; + + while (!list_empty(&nfc->chips)) { + mtk_chip = list_first_entry(&nfc->chips, + struct mtk_nfc_nand_chip, node); + chip = &mtk_chip->nand; + ret = mtd_device_unregister(nand_to_mtd(chip)); + WARN_ON(ret); + nand_cleanup(chip); + list_del(&mtk_chip->node); + } +} + static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc) { struct device_node *np = dev->of_node; @@ -1567,20 +1584,8 @@ release_ecc: static void mtk_nfc_remove(struct platform_device *pdev) { struct mtk_nfc *nfc = platform_get_drvdata(pdev); - struct mtk_nfc_nand_chip *mtk_chip; - struct nand_chip *chip; - int ret; - - while (!list_empty(&nfc->chips)) { - mtk_chip = list_first_entry(&nfc->chips, - struct mtk_nfc_nand_chip, node); - chip = &mtk_chip->nand; - ret = mtd_device_unregister(nand_to_mtd(chip)); - WARN_ON(ret); - nand_cleanup(chip); - list_del(&mtk_chip->node); - } + mtk_nfc_nand_chips_cleanup(nfc); mtk_ecc_release(nfc->ecc); } From 0f0222d5abe91a778130461ae4f4ba7081e413e2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 26 Aug 2024 17:30:19 +0200 Subject: [PATCH 133/534] mtd: rawnand: mtk: Fix init error path [ Upstream commit 2073ae37d550ea32e8545edaa94ef10b4fef7235 ] Reviewing a series converting the for_each_chil_of_node() loops into their _scoped variants made me realize there was no cleanup of the already registered NAND devices upon error which may leak memory on systems with more than a chip when this error occurs. We should call the _nand_chips_cleanup() function when this happens. Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device") Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/linux-mtd/20240826153019.67106-2-miquel.raynal@bootlin.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/mtk_nand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index 3fb32a59fdf6..161a409ca4ed 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1453,8 +1453,10 @@ static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc) for_each_child_of_node_scoped(np, nand_np) { ret = mtk_nfc_nand_chip_init(dev, nfc, nand_np); - if (ret) + if (ret) { + mtk_nfc_nand_chips_cleanup(nfc); return ret; + } } return 0; From 324e1ec46356392e03d6e9ee968e0f17e0f7c3bd Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 20 Aug 2024 15:27:19 +0200 Subject: [PATCH 134/534] iommu/arm-smmu-qcom: hide last LPASS SMMU context bank from linux [ Upstream commit 3a8990b8a778219327c5f8ecf10b5d81377b925a ] On qcom msm8998, writing to the last context bank of lpass_q6_smmu (base address 0x05100000) produces a system freeze & reboot. The hardware/hypervisor reports 13 context banks for the LPASS SMMU on msm8998, but only the first 12 are accessible... Override the number of context banks [ 2.546101] arm-smmu 5100000.iommu: probing hardware configuration... [ 2.552439] arm-smmu 5100000.iommu: SMMUv2 with: [ 2.558945] arm-smmu 5100000.iommu: stage 1 translation [ 2.563627] arm-smmu 5100000.iommu: address translation ops [ 2.568923] arm-smmu 5100000.iommu: non-coherent table walk [ 2.574566] arm-smmu 5100000.iommu: (IDR0.CTTW overridden by FW configuration) [ 2.580220] arm-smmu 5100000.iommu: stream matching with 12 register groups [ 2.587263] arm-smmu 5100000.iommu: 13 context banks (0 stage-2 only) [ 2.614447] arm-smmu 5100000.iommu: Supported page sizes: 0x63315000 [ 2.621358] arm-smmu 5100000.iommu: Stage-1: 36-bit VA -> 36-bit IPA [ 2.627772] arm-smmu 5100000.iommu: preserved 0 boot mappings Specifically, the crashes occur here: qsmmu->bypass_cbndx = smmu->num_context_banks - 1; arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0); and here: arm_smmu_write_context_bank(smmu, i); arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); It is likely that FW reserves the last context bank for its own use, thus a simple work-around is: DON'T USE IT in Linux. If we decrease the number of context banks, last one will be "hidden". Signed-off-by: Marc Gonzalez Reviewed-by: Caleb Connolly Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240820-smmu-v3-1-2f71483b00ec@freebox.fr Signed-off-by: Will Deacon Stable-dep-of: 19eb465c969f ("iommu/arm-smmu-qcom: apply num_context_bank fixes for SDM630 / SDM660") Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6e6cb19c81f5..b899a7b7fa93 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -278,6 +278,13 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) u32 smr; int i; + /* + * MSM8998 LPASS SMMU reports 13 context banks, but accessing + * the last context bank crashes the system. + */ + if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") && smmu->num_context_banks == 13) + smmu->num_context_banks = 12; + /* * Some platforms support more than the Arm SMMU architected maximum of * 128 stream matching groups. For unknown reasons, the additional From 7acaef4f28b64d605a30ab7abeeef9996b90b82a Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 24 Aug 2024 01:12:01 +0200 Subject: [PATCH 135/534] iommu/arm-smmu-qcom: Work around SDM845 Adreno SMMU w/ 16K pages [ Upstream commit 2d42d3ba443706c9164fa0bef4e5fd1c36bc1bd9 ] SDM845's Adreno SMMU is unique in that it actually advertizes support for 16K (and 32M) pages, which doesn't hold for newer SoCs. This however, seems either broken in the hardware implementation, the hypervisor middleware that abstracts the SMMU, or there's a bug in the Linux kernel somewhere down the line that nobody managed to track down. Booting SDM845 with 16K page sizes and drm/msm results in: *** gpu fault: ttbr0=0000000000000000 iova=000100000000c000 dir=READ type=TRANSLATION source=CP (0,0,0,0) right after loading the firmware. The GPU then starts spitting out illegal intstruction errors, as it's quite obvious that it got a bogus pointer. Moreover, it seems like this issue also concerns other implementations of SMMUv2 on Qualcomm SoCs, such as the one on SC7180. Hide 16K support on such instances to work around this. Reported-by: Sumit Semwal Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240824-topic-845_gpu_smmu-v2-1-a302b8acc052@quicinc.com Signed-off-by: Will Deacon Stable-dep-of: 19eb465c969f ("iommu/arm-smmu-qcom: apply num_context_bank fixes for SDM630 / SDM660") Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index b899a7b7fa93..db2092d5af5e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -341,6 +341,14 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) return 0; } +static int qcom_adreno_smmuv2_cfg_probe(struct arm_smmu_device *smmu) +{ + /* Support for 16K pages is advertised on some SoCs, but it doesn't seem to work */ + smmu->features &= ~ARM_SMMU_FEAT_FMT_AARCH64_16K; + + return 0; +} + static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; @@ -431,6 +439,7 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = { static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { .init_context = qcom_adreno_smmu_init_context, + .cfg_probe = qcom_adreno_smmuv2_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, From c390a26db31ab61efe4ba3b05e7a9560d5e26445 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 7 Sep 2024 21:48:12 +0300 Subject: [PATCH 136/534] iommu/arm-smmu-qcom: apply num_context_bank fixes for SDM630 / SDM660 [ Upstream commit 19eb465c969f3d6ed1b98506d3e470e863b41e16 ] The Qualcomm SDM630 / SDM660 platform requires the same kind of workaround as MSM8998: some IOMMUs have context banks reserved by firmware / TZ, touching those banks resets the board. Apply the num_context_bank workaround to those two SMMU devices in order to allow them to be used by Linux. Fixes: b812834b5329 ("iommu: arm-smmu-qcom: Add sdm630/msm8998 compatibles for qcom quirks") Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240907-sdm660-wifi-v1-1-e316055142f8@linaro.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index db2092d5af5e..d49158936019 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -282,8 +282,15 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) * MSM8998 LPASS SMMU reports 13 context banks, but accessing * the last context bank crashes the system. */ - if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") && smmu->num_context_banks == 13) + if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") && + smmu->num_context_banks == 13) { smmu->num_context_banks = 12; + } else if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2")) { + if (smmu->num_context_banks == 21) /* SDM630 / SDM660 A2NOC SMMU */ + smmu->num_context_banks = 7; + else if (smmu->num_context_banks == 14) /* SDM630 / SDM660 LPASS SMMU */ + smmu->num_context_banks = 13; + } /* * Some platforms support more than the Arm SMMU architected maximum of @@ -346,6 +353,11 @@ static int qcom_adreno_smmuv2_cfg_probe(struct arm_smmu_device *smmu) /* Support for 16K pages is advertised on some SoCs, but it doesn't seem to work */ smmu->features &= ~ARM_SMMU_FEAT_FMT_AARCH64_16K; + /* TZ protects several last context banks, hide them from Linux */ + if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2") && + smmu->num_context_banks == 5) + smmu->num_context_banks = 2; + return 0; } From 8e6f4aa43b7915d621adba37273e81d06c25128d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 4 Sep 2024 16:30:45 +0200 Subject: [PATCH 137/534] pmdomain: core: Harden inter-column space in debug summary [ Upstream commit 692c20c4d075bd452acfbbc68200fc226c7c9496 ] The inter-column space in the debug summary is two spaces. However, in one case, the extra space is handled implicitly in a field width specifier. Make inter-column space explicit to ease future maintenance. Fixes: 45fbc464b047 ("PM: domains: Add "performance" column to debug summary") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/ae61eb363621b981edde878e1e74d701702a579f.1725459707.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 84443b6bd882..582564f8dde6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -3135,7 +3135,7 @@ static int genpd_summary_one(struct seq_file *s, else snprintf(state, sizeof(state), "%s", status_lookup[genpd->status]); - seq_printf(s, "%-30s %-50s %u", genpd->name, state, genpd->performance_state); + seq_printf(s, "%-30s %-49s %u", genpd->name, state, genpd->performance_state); /* * Modifications on the list require holding locks on both From 6e513c2e94606d5a939bf9b32662f639f3a7dc7a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Jan 2024 17:54:32 +0100 Subject: [PATCH 138/534] drm/stm: Fix an error handling path in stm_drm_platform_probe() [ Upstream commit ce7c90bfda2656418c69ba0dd8f8a7536b8928d4 ] If drm_dev_register() fails, a call to drv_load() must be undone, as already done in the remove function. Fixes: b759012c5fa7 ("drm/stm: Add STM32 LTDC driver") Signed-off-by: Christophe JAILLET Acked-by: Raphael Gallais-Pou Link: https://patchwork.freedesktop.org/patch/msgid/20fff7f853f20a48a96db8ff186124470ec4d976.1704560028.git.christophe.jaillet@wanadoo.fr Signed-off-by: Raphael Gallais-Pou Signed-off-by: Sasha Levin --- drivers/gpu/drm/stm/drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index e8523abef27a..4d2db079ad4f 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -203,12 +203,14 @@ static int stm_drm_platform_probe(struct platform_device *pdev) ret = drm_dev_register(ddev, 0); if (ret) - goto err_put; + goto err_unload; drm_fbdev_dma_setup(ddev, 16); return 0; +err_unload: + drv_unload(ddev); err_put: drm_dev_put(ddev); From fc8b0b8dbdbacc642f3b0ae8a579fdb28c5eea61 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 31 May 2023 10:28:54 +0300 Subject: [PATCH 139/534] drm/stm: ltdc: check memory returned by devm_kzalloc() [ Upstream commit fd39730c58890cd7f0a594231e19bb357f28877c ] devm_kzalloc() can fail and return NULL pointer. Check its return status. Identified with Coccinelle (kmerr.cocci script). Fixes: 484e72d3146b ("drm/stm: ltdc: add support of ycbcr pixel formats") Signed-off-by: Claudiu Beznea Acked-by: Raphael Gallais-Pou Link: https://patchwork.freedesktop.org/patch/msgid/20230531072854.142629-1-claudiu.beznea@microchip.com Signed-off-by: Raphael Gallais-Pou Signed-off-by: Sasha Levin --- drivers/gpu/drm/stm/ltdc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 5576fdae4962..5aec1e58c968 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -1580,6 +1580,8 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, ARRAY_SIZE(ltdc_drm_fmt_ycbcr_sp) + ARRAY_SIZE(ltdc_drm_fmt_ycbcr_fp)) * sizeof(*formats), GFP_KERNEL); + if (!formats) + return NULL; for (i = 0; i < ldev->caps.pix_fmt_nb; i++) { drm_fmt = ldev->caps.pix_fmt_drm[i]; From ddf9ff244d704e1903533f7be377615ed34b83e7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 22 Jul 2024 17:18:17 +0530 Subject: [PATCH 140/534] drm/amd/display: Add null check for set_output_gamma in dcn30_set_output_transfer_func [ Upstream commit 08ae395ea22fb3d9b318c8bde28c0dfd2f5fa4d2 ] This commit adds a null check for the set_output_gamma function pointer in the dcn30_set_output_transfer_func function. Previously, set_output_gamma was being checked for nullity at line 386, but then it was being dereferenced without any nullity check at line 401. This could potentially lead to a null pointer dereference error if set_output_gamma is indeed null. To fix this, we now ensure that set_output_gamma is not null before dereferencing it. We do this by adding a nullity check for set_output_gamma before the call to set_output_gamma at line 401. If set_output_gamma is null, we log an error message and do not call the function. This fix prevents a potential null pointer dereference error. drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:401 dcn30_set_output_transfer_func() error: we previously assumed 'mpc->funcs->set_output_gamma' could be null (see line 386) drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c 373 bool dcn30_set_output_transfer_func(struct dc *dc, 374 struct pipe_ctx *pipe_ctx, 375 const struct dc_stream_state *stream) 376 { 377 int mpcc_id = pipe_ctx->plane_res.hubp->inst; 378 struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; 379 const struct pwl_params *params = NULL; 380 bool ret = false; 381 382 /* program OGAM or 3DLUT only for the top pipe*/ 383 if (pipe_ctx->top_pipe == NULL) { 384 /*program rmu shaper and 3dlut in MPC*/ 385 ret = dcn30_set_mpc_shaper_3dlut(pipe_ctx, stream); 386 if (ret == false && mpc->funcs->set_output_gamma) { ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If this is NULL 387 if (stream->out_transfer_func.type == TF_TYPE_HWPWL) 388 params = &stream->out_transfer_func.pwl; 389 else if (pipe_ctx->stream->out_transfer_func.type == 390 TF_TYPE_DISTRIBUTED_POINTS && 391 cm3_helper_translate_curve_to_hw_format( 392 &stream->out_transfer_func, 393 &mpc->blender_params, false)) 394 params = &mpc->blender_params; 395 /* there are no ROM LUTs in OUTGAM */ 396 if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED) 397 BREAK_TO_DEBUGGER(); 398 } 399 } 400 --> 401 mpc->funcs->set_output_gamma(mpc, mpcc_id, params); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Then it will crash 402 return ret; 403 } Fixes: d99f13878d6f ("drm/amd/display: Add DCN3 HWSEQ") Reported-by: Dan Carpenter Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Hersen Wu Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index ba47a1c8eec1..d59af329d000 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -214,7 +214,11 @@ bool dcn30_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + else + DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__); + return ret; } From 78b9e10b3bd1bb04c40f9f12b899951a6904772b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:23:56 -0400 Subject: [PATCH 141/534] drm/amdgpu: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8155566a26b8d6c1dd914f06a0c652e4e2f2adf1 ] The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/amdgpu/atombios_encoders.c | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index d95b2dc78063..157e898dc382 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2065,26 +2065,29 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { adev->mode_info.bios_hardcoded_edid = edid; adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From de67850b401946dc683103d11d7814ae5611750e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2024 13:31:58 -0400 Subject: [PATCH 142/534] drm/radeon: properly handle vbios fake edid sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 17c6baff3d5f65c8da164137a58742541a060b2f ] The comment in the vbios structure says: // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 This fake edid struct has not been used in a long time, so I'm not sure if there were actually any boards out there with a non-128 byte EDID, but align the code with the comment. Reviewed-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Link: https://lists.freedesktop.org/archives/amd-gfx/2024-June/109964.html Fixes: c324acd5032f ("drm/radeon/kms: parse the extended LCD info block") Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_atombios.c | 29 +++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 061396e7fa0f..53c7273eb6a5 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1716,26 +1716,29 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); + int edid_size; + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = kmemdup(&fake_edid_record->ucFakeEDIDString[0], + edid_size, GFP_KERNEL); + if (edid) { if (drm_edid_is_valid(edid)) { rdev->mode_info.bios_hardcoded_edid = edid; rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else + } else { kfree(edid); + } } + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; From 464fd60a16d230e4fbb71a7e7c7fa88f713ecf9f Mon Sep 17 00:00:00 2001 From: Gilbert Wu Date: Thu, 11 Jul 2024 14:47:02 -0500 Subject: [PATCH 143/534] scsi: smartpqi: revert propagate-the-multipath-failure-to-SML-quickly [ Upstream commit f1393d52e6cda9c20f12643cbecf1e1dc357e0e2 ] Correct a rare multipath failure issue by reverting commit 94a68c814328 ("scsi: smartpqi: Quickly propagate path failures to SCSI midlayer") [1]. Reason for revert: The patch propagated the path failure to SML quickly when one of the path fails during IO and AIO path gets disabled for a multipath device. But it created a new issue: when creating a volume on an encryption-enabled controller, the firmware reports the AIO path is disabled, which cause the driver to report a path failure to SML for a multipath device. There will be a new fix to handle "Illegal request" and "Invalid field in parameter list" on RAID path when the AIO path is disabled on a multipath device. [1] https://lore.kernel.org/all/164375209313.440833.9992416628621839233.stgit@brunhilda.pdev.net/ Fixes: 94a68c814328 ("scsi: smartpqi: Quickly propagate path failures to SCSI midlayer") Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mike McGowen Signed-off-by: Gilbert Wu Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20240711194704.982400-4-don.brace@microchip.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 868453b18c9a..2ae64cda8bc9 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2355,14 +2355,6 @@ static inline void pqi_mask_device(u8 *scsi3addr) scsi3addr[3] |= 0xc0; } -static inline bool pqi_is_multipath_device(struct pqi_scsi_dev *device) -{ - if (pqi_is_logical_device(device)) - return false; - - return (device->path_map & (device->path_map - 1)) != 0; -} - static inline bool pqi_expose_device(struct pqi_scsi_dev *device) { return !device->is_physical_device || !pqi_skip_device(device->scsi3addr); @@ -3259,14 +3251,12 @@ static void pqi_process_aio_io_error(struct pqi_io_request *io_request) int residual_count; int xfer_count; bool device_offline; - struct pqi_scsi_dev *device; scmd = io_request->scmd; error_info = io_request->error_info; host_byte = DID_OK; sense_data_length = 0; device_offline = false; - device = scmd->device->hostdata; switch (error_info->service_response) { case PQI_AIO_SERV_RESPONSE_COMPLETE: @@ -3291,14 +3281,8 @@ static void pqi_process_aio_io_error(struct pqi_io_request *io_request) break; case PQI_AIO_STATUS_AIO_PATH_DISABLED: pqi_aio_path_disabled(io_request); - if (pqi_is_multipath_device(device)) { - pqi_device_remove_start(device); - host_byte = DID_NO_CONNECT; - scsi_status = SAM_STAT_CHECK_CONDITION; - } else { - scsi_status = SAM_STAT_GOOD; - io_request->status = -EAGAIN; - } + scsi_status = SAM_STAT_GOOD; + io_request->status = -EAGAIN; break; case PQI_AIO_STATUS_NO_PATH_TO_DEVICE: case PQI_AIO_STATUS_INVALID_DEVICE: From 53c18f7baf0c9aa0603abbbe4c8e3547ad7ebfd7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 7 Aug 2024 13:36:28 +1000 Subject: [PATCH 144/534] scsi: NCR5380: Check for phase match during PDMA fixup [ Upstream commit 5768718da9417331803fc4bc090544c2a93b88dc ] It's not an error for a target to change the bus phase during a transfer. Unfortunately, the FLAG_DMA_FIXUP workaround does not allow for that -- a phase change produces a DRQ timeout error and the device borken flag will be set. Check the phase match bit during FLAG_DMA_FIXUP processing. Don't forget to decrement the command residual. While we are here, change shost_printk() into scmd_printk() for better consistency with other DMA error messages. Tested-by: Stan Johnson Fixes: 55181be8ced1 ("ncr5380: Replace redundant flags with FLAG_NO_DMA_FIXUP") Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/99dc7d1f4c825621b5b120963a69f6cd3e9ca659.1723001788.git.fthain@linux-m68k.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 78 +++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index cea3a79d538e..00e245173320 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -1485,6 +1485,7 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char **data) { struct NCR5380_hostdata *hostdata = shost_priv(instance); + struct NCR5380_cmd *ncmd = NCR5380_to_ncmd(hostdata->connected); int c = *count; unsigned char p = *phase; unsigned char *d = *data; @@ -1496,7 +1497,7 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, return -1; } - NCR5380_to_ncmd(hostdata->connected)->phase = p; + ncmd->phase = p; if (p & SR_IO) { if (hostdata->read_overruns) @@ -1608,45 +1609,44 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, * request. */ - if (hostdata->flags & FLAG_DMA_FIXUP) { - if (p & SR_IO) { - /* - * The workaround was to transfer fewer bytes than we - * intended to with the pseudo-DMA read function, wait for - * the chip to latch the last byte, read it, and then disable - * pseudo-DMA mode. - * - * After REQ is asserted, the NCR5380 asserts DRQ and ACK. - * REQ is deasserted when ACK is asserted, and not reasserted - * until ACK goes false. Since the NCR5380 won't lower ACK - * until DACK is asserted, which won't happen unless we twiddle - * the DMA port or we take the NCR5380 out of DMA mode, we - * can guarantee that we won't handshake another extra - * byte. - */ - - if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, - BASR_DRQ, BASR_DRQ, 0) < 0) { - result = -1; - shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n"); - } - if (NCR5380_poll_politely(hostdata, STATUS_REG, - SR_REQ, 0, 0) < 0) { - result = -1; - shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n"); - } - d[*count - 1] = NCR5380_read(INPUT_DATA_REG); - } else { - /* - * Wait for the last byte to be sent. If REQ is being asserted for - * the byte we're interested, we'll ACK it and it will go false. - */ - if (NCR5380_poll_politely2(hostdata, - BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ, - BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, 0) < 0) { - result = -1; - shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n"); + if ((hostdata->flags & FLAG_DMA_FIXUP) && + (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) { + /* + * The workaround was to transfer fewer bytes than we + * intended to with the pseudo-DMA receive function, wait for + * the chip to latch the last byte, read it, and then disable + * DMA mode. + * + * After REQ is asserted, the NCR5380 asserts DRQ and ACK. + * REQ is deasserted when ACK is asserted, and not reasserted + * until ACK goes false. Since the NCR5380 won't lower ACK + * until DACK is asserted, which won't happen unless we twiddle + * the DMA port or we take the NCR5380 out of DMA mode, we + * can guarantee that we won't handshake another extra + * byte. + * + * If sending, wait for the last byte to be sent. If REQ is + * being asserted for the byte we're interested, we'll ACK it + * and it will go false. + */ + if (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, + BASR_DRQ, BASR_DRQ, 0)) { + if ((p & SR_IO) && + (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) { + if (!NCR5380_poll_politely(hostdata, STATUS_REG, + SR_REQ, 0, 0)) { + d[c] = NCR5380_read(INPUT_DATA_REG); + --ncmd->this_residual; + } else { + result = -1; + scmd_printk(KERN_ERR, hostdata->connected, + "PDMA fixup: !REQ timeout\n"); + } } + } else if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH) { + result = -1; + scmd_printk(KERN_ERR, hostdata->connected, + "PDMA fixup: DRQ timeout\n"); } } From 8e7760ed234f55f28dfd68254d08c9d0527dd766 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 31 Jul 2024 12:10:40 +0800 Subject: [PATCH 145/534] drm/amd/amdgpu: Properly tune the size of struct [ Upstream commit 0cee47cde41e22712c034ae961076067d4ac13a0 ] The struct assertion is failed because sparse cannot parse `#pragma pack(push, 1)` and `#pragma pack(pop)` correctly. GCC's output is still 1-byte-aligned. No harm to memory layout. The error can be filtered out by sparse-diff, but sometimes multiple lines queezed into one, making the sparse-diff thinks its a new error. I'm trying to aviod this by fixing errors. Link: https://lore.kernel.org/all/20230620045919.492128-1-suhui@nfschina.com/ Link: https://lore.kernel.org/all/93d10611-9fbb-4242-87b8-5860b2606042@suswa.mountain/ Fixes: 1721bc1b2afa ("drm/amdgpu: Update VF2PF interface") Cc: Dan Carpenter Cc: wenlunpeng Reported-by: Su Hui Signed-off-by: WangYuli Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 104a5ad8397d..198687545407 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -209,7 +209,7 @@ struct amd_sriov_msg_pf2vf_info { uint32_t pcie_atomic_ops_support_flags; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; -}; +} __packed; struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte */ @@ -267,7 +267,7 @@ struct amd_sriov_msg_vf2pf_info { /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; -}; +} __packed; /* mailbox message send from guest to host */ enum amd_sriov_mailbox_request_message { From 9ec05e0b4ac41ce7329ee3093de2388caa1635b8 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Sat, 15 Jun 2024 17:03:54 +0000 Subject: [PATCH 146/534] drm/rockchip: vop: Allow 4096px width scaling [ Upstream commit 0ef968d91a20b5da581839f093f98f7a03a804f7 ] There is no reason to limit VOP scaling to 3840px width, the limit of RK3288, when there are newer VOP versions that support 4096px width. Change to enforce a maximum of 4096px width plane scaling, the maximum supported output width of the VOP versions supported by this driver. Fixes: 4c156c21c794 ("drm/rockchip: vop: support plane scale") Signed-off-by: Alex Bee Signed-off-by: Jonas Karlman Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240615170417.3134517-4-jonas@kwiboo.se Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 4b338cb89d32..c6fbfc0baecc 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -381,8 +381,8 @@ static void scl_vop_cal_scl_fac(struct vop *vop, const struct vop_win_data *win, if (info->is_yuv) is_yuv = true; - if (dst_w > 3840) { - DRM_DEV_ERROR(vop->dev, "Maximum dst width (3840) exceeded\n"); + if (dst_w > 4096) { + DRM_DEV_ERROR(vop->dev, "Maximum dst width (4096) exceeded\n"); return; } From 656803ab1ad2f0eb8f26bdf99f35decd224d0b8a Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sat, 15 Jun 2024 17:03:55 +0000 Subject: [PATCH 147/534] drm/rockchip: dw_hdmi: Fix reading EDID when using a forced mode [ Upstream commit a5d024541ec466f428e6c514577d511a40779c7b ] EDID cannot be read on RK3328 until after read_hpd has been called and correct io voltage has been configured based on connection status. When a forced mode is used, e.g. video=1920x1080@60e, the connector detect ops, that in turn normally calls the read_hpd, never gets called. This result in reading EDID to fail in connector get_modes ops. Call dw_hdmi_rk3328_read_hpd at end of dw_hdmi_rk3328_setup_hpd to correct io voltage and allow reading EDID after setup_hpd. Fixes: 1c53ba8f22a1 ("drm/rockchip: dw_hdmi: add dw-hdmi support for the rk3328") Signed-off-by: Jonas Karlman Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240615170417.3134517-5-jonas@kwiboo.se Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 341550199111..89bc86d62014 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -435,6 +435,8 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) HIWORD_UPDATE(RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK, RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK | RK3328_HDMI_HPD_IOE)); + + dw_hdmi_rk3328_read_hpd(dw_hdmi, data); } static const struct dw_hdmi_phy_ops rk3228_hdmi_phy_ops = { From fa94d60546d2ba8c55f3c87483404162f1fbe863 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 6 Aug 2024 10:19:04 -0700 Subject: [PATCH 148/534] drm/radeon/evergreen_cs: fix int overflow errors in cs track offsets [ Upstream commit 3fbaf475a5b8361ebee7da18964db809e37518b7 ] Several cs track offsets (such as 'track->db_s_read_offset') either are initialized with or plainly take big enough values that, once shifted 8 bits left, may be hit with integer overflow if the resulting values end up going over u32 limit. Same goes for a few instances of 'surf.layer_size * mslice' multiplications that are added to 'offset' variable - they may potentially overflow as well and need to be validated properly. While some debug prints in this code section take possible overflow issues into account, simply casting to (unsigned long) may be erroneous in its own way, as depending on CPU architecture one is liable to get different results. Fix said problems by: - casting 'offset' to fixed u64 data type instead of ambiguous unsigned long. - casting one of the operands in vulnerable to integer overflow cases to u64. - adjust format specifiers in debug prints to properly represent 'offset' values. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 285484e2d55e ("drm/radeon: add support for evergreen/ni tiling informations v11") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/evergreen_cs.c | 62 +++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 0de79f3a7e3f..820c2c3641d3 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -395,7 +395,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -433,14 +433,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch @@ -448,14 +448,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i */ if (!surf.mode) { uint32_t *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -467,7 +467,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -476,9 +476,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -562,7 +562,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -608,18 +608,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -627,18 +627,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -659,7 +659,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -706,34 +706,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } From 087b88088015981f03ff9cd2d029e89f1027683c Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 13 Aug 2024 17:16:37 +0800 Subject: [PATCH 149/534] drm/bridge: lontium-lt8912b: Validate mode in drm_bridge_funcs::mode_valid() [ Upstream commit fe828fbd87786238b30f44cafd698d975d956c97 ] If the bridge is attached with the DRM_BRIDGE_ATTACH_NO_CONNECTOR flag set, this driver won't initialize a connector and hence display mode won't be validated in drm_connector_helper_funcs::mode_valid(). So, move the mode validation from drm_connector_helper_funcs::mode_valid() to drm_bridge_funcs::mode_valid(), because the mode validation is always done for the bridge. Fixes: 30e2ae943c26 ("drm/bridge: Introduce LT8912B DSI to HDMI bridge") Signed-off-by: Liu Ying Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240813091637.1054586-1-victor.liu@nxp.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/lontium-lt8912b.c | 35 ++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 0efcbc73f2a4..5e43a40a5d52 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -411,22 +411,6 @@ static const struct drm_connector_funcs lt8912_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static enum drm_mode_status -lt8912_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - if (mode->clock > 150000) - return MODE_CLOCK_HIGH; - - if (mode->hdisplay > 1920) - return MODE_BAD_HVALUE; - - if (mode->vdisplay > 1080) - return MODE_BAD_VVALUE; - - return MODE_OK; -} - static int lt8912_connector_get_modes(struct drm_connector *connector) { const struct drm_edid *drm_edid; @@ -452,7 +436,6 @@ static int lt8912_connector_get_modes(struct drm_connector *connector) static const struct drm_connector_helper_funcs lt8912_connector_helper_funcs = { .get_modes = lt8912_connector_get_modes, - .mode_valid = lt8912_connector_mode_valid, }; static void lt8912_bridge_mode_set(struct drm_bridge *bridge, @@ -594,6 +577,23 @@ static void lt8912_bridge_detach(struct drm_bridge *bridge) drm_bridge_hpd_disable(lt->hdmi_port); } +static enum drm_mode_status +lt8912_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + if (mode->clock > 150000) + return MODE_CLOCK_HIGH; + + if (mode->hdisplay > 1920) + return MODE_BAD_HVALUE; + + if (mode->vdisplay > 1080) + return MODE_BAD_VVALUE; + + return MODE_OK; +} + static enum drm_connector_status lt8912_bridge_detect(struct drm_bridge *bridge) { @@ -624,6 +624,7 @@ static struct edid *lt8912_bridge_get_edid(struct drm_bridge *bridge, static const struct drm_bridge_funcs lt8912_bridge_funcs = { .attach = lt8912_bridge_attach, .detach = lt8912_bridge_detach, + .mode_valid = lt8912_bridge_mode_valid, .mode_set = lt8912_bridge_mode_set, .enable = lt8912_bridge_enable, .detect = lt8912_bridge_detect, From 9263023a0b04706c913f4f26ace2761f44f65269 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 21 Aug 2024 23:40:45 +0200 Subject: [PATCH 150/534] drm/vc4: hdmi: Handle error case of pm_runtime_resume_and_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f1a54e860b1bc8d824925b5a77f510913880e8d6 ] The commit 0f5251339eda ("drm/vc4: hdmi: Make sure the controller is powered in detect") introduced the necessary power management handling to avoid register access while controller is powered down. Unfortunately it just print a warning if pm_runtime_resume_and_get() fails and proceed anyway. This could happen during suspend to idle. So we must assume it is unsafe to access the HDMI register. So bail out properly. Fixes: 0f5251339eda ("drm/vc4: hdmi: Make sure the controller is powered in detect") Signed-off-by: Stefan Wahren Reviewed-by: Maíra Canal Acked-by: Maxime Ripard Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240821214052.6800-3-wahrenst@gmx.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_hdmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 643754fa6a8a..c6e986f71a26 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -458,6 +458,7 @@ static int vc4_hdmi_connector_detect_ctx(struct drm_connector *connector, { struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); enum drm_connector_status status = connector_status_disconnected; + int ret; /* * NOTE: This function should really take vc4_hdmi->mutex, but @@ -470,7 +471,12 @@ static int vc4_hdmi_connector_detect_ctx(struct drm_connector *connector, * the lock for now. */ - WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); + ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); + if (ret) { + drm_err_once(connector->dev, "Failed to retain HDMI power domain: %d\n", + ret); + return connector_status_unknown; + } if (vc4_hdmi->hpd_gpio) { if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) From baeb8628ab7f4577740f00e439d3fdf7c876b0ff Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Aug 2024 14:29:05 +0300 Subject: [PATCH 151/534] scsi: elx: libefc: Fix potential use after free in efc_nport_vport_del() [ Upstream commit 2e4b02fad094976763af08fec2c620f4f8edd9ae ] The kref_put() function will call nport->release if the refcount drops to zero. The nport->release release function is _efc_nport_free() which frees "nport". But then we dereference "nport" on the next line which is a use after free. Re-order these lines to avoid the use after free. Fixes: fcd427303eb9 ("scsi: elx: libefc: SLI and FC PORT state machine interfaces") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/b666ab26-6581-4213-9a3d-32a9147f0399@stanley.mountain Reviewed-by: Daniel Wagner Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/elx/libefc/efc_nport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/elx/libefc/efc_nport.c b/drivers/scsi/elx/libefc/efc_nport.c index 2e83a667901f..1a7437f4328e 100644 --- a/drivers/scsi/elx/libefc/efc_nport.c +++ b/drivers/scsi/elx/libefc/efc_nport.c @@ -705,9 +705,9 @@ efc_nport_vport_del(struct efc *efc, struct efc_domain *domain, spin_lock_irqsave(&efc->lock, flags); list_for_each_entry(nport, &domain->nport_list, list_entry) { if (nport->wwpn == wwpn && nport->wwnn == wwnn) { - kref_put(&nport->ref, nport->release); /* Shutdown this NPORT */ efc_sm_post_event(&nport->sm, EFC_EVT_SHUTDOWN, NULL); + kref_put(&nport->ref, nport->release); break; } } From c1ba4b8ca799ff1d99d01f37d7ccb7d5ba5533d2 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 19 Aug 2024 13:05:46 +0900 Subject: [PATCH 152/534] jfs: fix out-of-bounds in dbNextAG() and diAlloc() [ Upstream commit e63866a475562810500ea7f784099bfe341e761a ] In dbNextAG() , there is no check for the case where bmp->db_numag is greater or same than MAXAG due to a polluted image, which causes an out-of-bounds. Therefore, a bounds check should be added in dbMount(). And in dbNextAG(), a check for the case where agpref is greater than bmp->db_numag should be added, so an out-of-bounds exception should be prevented. Additionally, a check for the case where agno is greater or same than MAXAG should be added in diAlloc() to prevent out-of-bounds. Reported-by: Jeongjun Park Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jeongjun Park Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 4 ++-- fs/jfs/jfs_imap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 5713994328cb..0625d1c0d064 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -187,7 +187,7 @@ int dbMount(struct inode *ipbmap) } bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); - if (!bmp->db_numag) { + if (!bmp->db_numag || bmp->db_numag >= MAXAG) { err = -EINVAL; goto err_release_metapage; } @@ -652,7 +652,7 @@ int dbNextAG(struct inode *ipbmap) * average free space. */ for (i = 0 ; i < bmp->db_numag; i++, agpref++) { - if (agpref == bmp->db_numag) + if (agpref >= bmp->db_numag) agpref = 0; if (atomic_read(&bmp->db_active[agpref])) diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 82d88dcf0ea6..b30e4cf2f579 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -1360,7 +1360,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) /* get the ag number of this iag */ agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag; - if (agno < 0 || agno > dn_numag) + if (agno < 0 || agno > dn_numag || agno >= MAXAG) return -EIO; if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { From 5b9b8cd28950d9351f81bffaa1a706ce8d33eb7b Mon Sep 17 00:00:00 2001 From: "Jason-JH.Lin" Date: Tue, 27 Aug 2024 22:55:19 +0800 Subject: [PATCH 153/534] drm/mediatek: Fix missing configuration flags in mtk_crtc_ddp_config() [ Upstream commit fe30bae552ce27b9fefe0b12db1544e73d07325f ] In mtk_crtc_ddp_config(), mtk_crtc will use some configuration flags to generate instructions to cmdq_handle, such as: state->pending_config mtk_crtc->pending_planes plane_state->pending.config mtk_crtc->pending_async_planes plane_state->pending.async_config These configuration flags may be set to false when a GCE IRQ comes calling ddp_cmdq_cb(). This may result in missing prepare instructions, especially if mtk_crtc_update_config() with the flase need_vblank (no need to wait for vblank) cases. Therefore, the mtk_crtc->config_updating flag is set at the beginning of mtk_crtc_update_config() to ensure that these configuration flags won't be changed when the mtk_crtc_ddp_config() is preparing instructions. But somehow the ddp_cmdq_cb() didn't use the mtk_crtc->config_updating flag to prevent those pending config flags from being cleared. To avoid missing the configuration when generating the config instruction, the config_updating flag should be added into ddp_cmdq_cb() and be protected with spin_lock. Fixes: 7f82d9c43879 ("drm/mediatek: Clear pending flag when cmdq packet is done") Signed-off-by: Jason-JH.Lin Reviewed-by: CK Hu Reviewed-by: Fei Shao Link: https://patchwork.kernel.org/project/dri-devel/patch/20240827-drm-fixup-0819-v3-1-4761005211ec@mediatek.com/ Link: https://patchwork.kernel.org/project/dri-devel/patch/20240827-drm-fixup-0819-v3-2-4761005211ec@mediatek.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 29e021ec6901..5a1c8b26f1bb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -67,6 +67,8 @@ struct mtk_drm_crtc { /* lock for display hardware access */ struct mutex hw_lock; bool config_updating; + /* lock for config_updating to cmd buffer */ + spinlock_t config_lock; }; struct mtk_crtc_state { @@ -104,11 +106,16 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) { + unsigned long flags; + drm_crtc_handle_vblank(&mtk_crtc->base); + + spin_lock_irqsave(&mtk_crtc->config_lock, flags); if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) { mtk_drm_crtc_finish_page_flip(mtk_crtc); mtk_crtc->pending_needs_vblank = false; } + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); } #if IS_REACHABLE(CONFIG_MTK_CMDQ) @@ -291,12 +298,19 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) struct mtk_drm_crtc *mtk_crtc = container_of(cmdq_cl, struct mtk_drm_crtc, cmdq_client); struct mtk_crtc_state *state; unsigned int i; + unsigned long flags; if (data->sta < 0) return; state = to_mtk_crtc_state(mtk_crtc->base.state); + spin_lock_irqsave(&mtk_crtc->config_lock, flags); + if (mtk_crtc->config_updating) { + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + goto ddp_cmdq_cb_out; + } + state->pending_config = false; if (mtk_crtc->pending_planes) { @@ -323,6 +337,10 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) mtk_crtc->pending_async_planes = false; } + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + +ddp_cmdq_cb_out: + mtk_crtc->cmdq_vblank_cnt = 0; wake_up(&mtk_crtc->cb_blocking_queue); } @@ -555,9 +573,14 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, struct mtk_drm_private *priv = crtc->dev->dev_private; unsigned int pending_planes = 0, pending_async_planes = 0; int i; + unsigned long flags; mutex_lock(&mtk_crtc->hw_lock); + + spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = true; + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + if (needs_vblank) mtk_crtc->pending_needs_vblank = true; @@ -611,7 +634,10 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } #endif + spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = false; + spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + mutex_unlock(&mtk_crtc->hw_lock); } @@ -1014,6 +1040,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, drm_mode_crtc_set_gamma_size(&mtk_crtc->base, gamma_lut_size); drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, has_ctm, gamma_lut_size); mutex_init(&mtk_crtc->hw_lock); + spin_lock_init(&mtk_crtc->config_lock); #if IS_REACHABLE(CONFIG_MTK_CMDQ) i = priv->mbox_index++; From 415a2c218370c0e0f121133942daa36f0c17bb55 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Wed, 28 Aug 2024 18:14:47 +0800 Subject: [PATCH 154/534] drm/mediatek: Use spin_lock_irqsave() for CRTC event lock [ Upstream commit be03b30b7aa99aca876fbc7c1c1b73b2d0339321 ] Use the state-aware spin_lock_irqsave() and spin_unlock_irqrestore() to avoid unconditionally re-enabling the local interrupts. Fixes: 411f5c1eacfe ("drm/mediatek: handle events when enabling/disabling crtc") Signed-off-by: Fei Shao Link: https://patchwork.kernel.org/project/dri-devel/patch/20240828101511.3269822-1-fshao@chromium.org/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 5a1c8b26f1bb..659112da47b6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -450,6 +450,7 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; struct drm_crtc *crtc = &mtk_crtc->base; + unsigned long flags; int i; for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { @@ -481,10 +482,10 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) pm_runtime_put(drm->dev); if (crtc->state->event && !crtc->state->active) { - spin_lock_irq(&crtc->dev->event_lock); + spin_lock_irqsave(&crtc->dev->event_lock, flags); drm_crtc_send_vblank_event(crtc, crtc->state->event); crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } } From 6b7a006ab003ee01358c2f34ee0f9f2a5aa6b434 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:45 +0200 Subject: [PATCH 155/534] powerpc/8xx: Fix initial memory mapping [ Upstream commit f9f2bff64c2f0dbee57be3d8c2741357ad3d05e6 ] Commit cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") introduced an initial mapping of kernel TEXT using PAGE_KERNEL_TEXT, but the pages that contain kernel TEXT may also contain kernel RODATA, and depending on selected debug options PAGE_KERNEL_TEXT may be either RWX or ROX. RODATA must be writable during init because it also contains ro_after_init data. So use PAGE_KERNEL_X instead to be sure it is RWX. Fixes: cf209951fa7f ("powerpc/8xx: Map linear memory with huge pages") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/dac7a828d8497c4548c91840575a706657baa4f1.1724173828.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/mm/nohash/8xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 324501630278..a947dff35d65 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,11 +149,11 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_X, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_X, true); mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } From efdf2af50b311b736c8cfaf348070539f9f8158c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 19:23:46 +0200 Subject: [PATCH 156/534] powerpc/8xx: Fix kernel vs user address comparison [ Upstream commit 65a82e117ffeeab0baf6f871a1cab11a28ace183 ] Since commit 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text"), module exec space is below PAGE_OFFSET so not only space above PAGE_OFFSET, but space above TASK_SIZE need to be seen as kernel space. Until now the problem went undetected because by default TASK_SIZE is 0x8000000 which means address space is determined by just checking upper address bit. But when TASK_SIZE is over 0x80000000, PAGE_OFFSET is used for comparison, leading to thinking module addresses are part of user space. Fix it by using TASK_SIZE instead of PAGE_OFFSET for address comparison. Fixes: 9132a2e82adc ("powerpc/8xx: Define a MODULE area below kernel text") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/3f574c9845ff0a023b46cb4f38d2c45aecd769bd.1724173828.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/kernel/head_8xx.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 647b0b445e89..0c94db8e2bde 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -41,12 +41,12 @@ #include "head_32.h" .macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 +#if CONFIG_TASK_SIZE <= 0x80000000 && MODULES_VADDR >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ not. \scratch, \addr #else rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h + cmpli cr0, \scratch, TASK_SIZE@h #endif .endm @@ -404,7 +404,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f From a02d92e8eb55f713cedf670fbf2714b269d9f212 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Aug 2024 10:00:29 +0200 Subject: [PATCH 157/534] powerpc/vdso: Inconditionally use CFUNC macro [ Upstream commit 65948b0e716a47382731889ee6bbb18642b8b003 ] During merge of commit 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") a fallback version of CFUNC macro was added at the last minute, so it can be used inconditionally. Fixes: 4e991e3c16a3 ("powerpc: add CFUNC assembly label annotation") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/0fa863f2f69b2ca4094ae066fcf1430fb31110c9.1724313540.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vdso/gettimeofday.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 48fc6658053a..894cb939cd2b 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -38,11 +38,7 @@ .else addi r4, r5, VDSO_DATA_OFFSET .endif -#ifdef __powerpc64__ bl CFUNC(DOTSYM(\funct)) -#else - bl \funct -#endif PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) From 7e34440a3d067088add16006dad12294512e5952 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 5 Jul 2024 12:13:12 +0300 Subject: [PATCH 158/534] drm/msm: Fix incorrect file name output in adreno_request_fw() [ Upstream commit e19366911340c2313a1abbb09c54eaf9bdea4f58 ] In adreno_request_fw() when debugging information is printed to the log after firmware load, an incorrect filename is printed. 'newname' is used instead of 'fwname', so prefix "qcom/" is being added to filename. Looks like "copy-paste" mistake. Fix this mistake by replacing 'newname' with 'fwname'. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2c41ef1b6f7d ("drm/msm/adreno: deal with linux-firmware fw paths") Signed-off-by: Aleksandr Mishin Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/602382/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 96deaf85c0cd..4127e2762dcd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -468,7 +468,7 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) ret = request_firmware_direct(&fw, fwname, drm->dev); if (!ret) { DRM_DEV_INFO(drm->dev, "loaded %s from legacy location\n", - newname); + fwname); adreno_gpu->fwloc = FW_LOCATION_LEGACY; goto out; } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { From b9415145327c49c4dc3ca972d486b6688cd54208 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sun, 1 Sep 2024 13:54:00 +0000 Subject: [PATCH 159/534] drm/msm/a5xx: disable preemption in submits by default [ Upstream commit db9dec2db76146d65e1cfbb6afb2e2bd5dab67f8 ] Fine grain preemption (switching from/to points within submits) requires extra handling in command stream of those submits, especially when rendering with tiling (using GMEM). However this handling is missing at this point in mesa (and always was). For this reason we get random GPU faults and hangs if more than one priority level is used because local preemption is enabled prior to executing command stream from submit. With that said it was ahead of time to enable local preemption by default considering the fact that even on downstream kernel it is only enabled if requested via UAPI. Fixes: a7a4c19c36de ("drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register") Signed-off-by: Vladimir Lypak Patchwork: https://patchwork.freedesktop.org/patch/612041/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e5916c106796..14d3a3e78a9d 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -150,9 +150,13 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - /* Enable local preemption for finegrain preemption */ + /* + * Disable local preemption by default because it requires + * user-space to be aware of it and provide additional handling + * to restore rendering state or do various flushes on switch. + */ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); - OUT_RING(ring, 0x1); + OUT_RING(ring, 0x0); /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ OUT_PKT7(ring, CP_YIELD_ENABLE, 1); From dfd012052bfb3cfcfa3622a4458fbf4a8fe8f1fb Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sun, 1 Sep 2024 13:54:01 +0000 Subject: [PATCH 160/534] drm/msm/a5xx: properly clear preemption records on resume [ Upstream commit 64fd6d01a52904bdbda0ce810a45a428c995a4ca ] Two fields of preempt_record which are used by CP aren't reset on resume: "data" and "info". This is the reason behind faults which happen when we try to switch to the ring that was active last before suspend. In addition those faults can't be recovered from because we use suspend and resume to do so (keeping values of those fields again). Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") Signed-off-by: Vladimir Lypak Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/612043/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index f58dd564d122..67a8ef4adf6b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -204,6 +204,8 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) return; for (i = 0; i < gpu->nr_rings; i++) { + a5xx_gpu->preempt[i]->data = 0; + a5xx_gpu->preempt[i]->info = 0; a5xx_gpu->preempt[i]->wptr = 0; a5xx_gpu->preempt[i]->rptr = 0; a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; From d9bef5ba5638cf5d6df84a03a068102e6407ac07 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sun, 1 Sep 2024 13:54:02 +0000 Subject: [PATCH 161/534] drm/msm/a5xx: fix races in preemption evaluation stage [ Upstream commit ce050f307ad93bcc5958d0dd35fc276fd394d274 ] On A5XX GPUs when preemption is used it's invietable to enter a soft lock-up state in which GPU is stuck at empty ring-buffer doing nothing. This appears as full UI lockup and not detected as GPU hang (because it's not). This happens due to not triggering preemption when it was needed. Sometimes this state can be recovered by some new submit but generally it won't happen because applications are waiting for old submits to retire. One of the reasons why this happens is a race between a5xx_submit and a5xx_preempt_trigger called from IRQ during submit retire. Former thread updates ring->cur of previously empty and not current ring right after latter checks it for emptiness. Then both threads can just exit because for first one preempt_state wasn't NONE yet and for second one all rings appeared to be empty. To prevent such situations from happening we need to establish guarantee for preempt_trigger to make decision after each submit or retire. To implement this we serialize preemption initiation using spinlock. If switch is already in progress we need to re-trigger preemption when it finishes. Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") Signed-off-by: Vladimir Lypak Patchwork: https://patchwork.freedesktop.org/patch/612045/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 24 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index c7187bcc5e90..b4d06ca3e499 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -36,6 +36,7 @@ struct a5xx_gpu { uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; atomic_t preempt_state; + spinlock_t preempt_start_lock; struct timer_list preempt_timer; struct drm_gem_object *shadow_bo; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 67a8ef4adf6b..c65b34a4a8cc 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -97,12 +97,19 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) if (gpu->nr_rings == 1) return; + /* + * Serialize preemption start to ensure that we always make + * decision on latest state. Otherwise we can get stuck in + * lower priority or empty ring. + */ + spin_lock_irqsave(&a5xx_gpu->preempt_start_lock, flags); + /* * Try to start preemption by moving from NONE to START. If * unsuccessful, a preemption is already in flight */ if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START)) - return; + goto out; /* Get the next ring to preempt to */ ring = get_next_ring(gpu); @@ -127,9 +134,11 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) set_preempt_state(a5xx_gpu, PREEMPT_ABORT); update_wptr(gpu, a5xx_gpu->cur_ring); set_preempt_state(a5xx_gpu, PREEMPT_NONE); - return; + goto out; } + spin_unlock_irqrestore(&a5xx_gpu->preempt_start_lock, flags); + /* Make sure the wptr doesn't update while we're in motion */ spin_lock_irqsave(&ring->preempt_lock, flags); a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); @@ -152,6 +161,10 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) /* And actually start the preemption */ gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1); + return; + +out: + spin_unlock_irqrestore(&a5xx_gpu->preempt_start_lock, flags); } void a5xx_preempt_irq(struct msm_gpu *gpu) @@ -188,6 +201,12 @@ void a5xx_preempt_irq(struct msm_gpu *gpu) update_wptr(gpu, a5xx_gpu->cur_ring); set_preempt_state(a5xx_gpu, PREEMPT_NONE); + + /* + * Try to trigger preemption again in case there was a submit or + * retire during ring switch + */ + a5xx_preempt_trigger(gpu); } void a5xx_preempt_hw_init(struct msm_gpu *gpu) @@ -300,5 +319,6 @@ void a5xx_preempt_init(struct msm_gpu *gpu) } } + spin_lock_init(&a5xx_gpu->preempt_start_lock); timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, 0); } From cbd26fc9ec4cc6d823caafcbf753a2b9a8dfc0c5 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sun, 1 Sep 2024 13:54:03 +0000 Subject: [PATCH 162/534] drm/msm/a5xx: workaround early ring-buffer emptiness check [ Upstream commit a30f9f65b5ac82d4390548c32ed9c7f05de7ddf5 ] There is another cause for soft lock-up of GPU in empty ring-buffer: race between GPU executing last commands and CPU checking ring for emptiness. On GPU side IRQ for retire is triggered by CACHE_FLUSH_TS event and RPTR shadow (which is used to check ring emptiness) is updated a bit later from CP_CONTEXT_SWITCH_YIELD. Thus if GPU is executing its last commands slow enough or we check that ring too fast we will miss a chance to trigger switch to lower priority ring because current ring isn't empty just yet. This can escalate to lock-up situation described in previous patch. To work-around this issue we keep track of last submit sequence number for each ring and compare it with one written to memptrs from GPU during execution of CACHE_FLUSH_TS event. Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") Signed-off-by: Vladimir Lypak Patchwork: https://patchwork.freedesktop.org/patch/612047/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++++ drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 14d3a3e78a9d..8c2758a18a19 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -65,6 +65,8 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = submit->ring; struct drm_gem_object *obj; uint32_t *ptr, dwords; @@ -109,6 +111,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit } } + a5xx_gpu->last_seqno[ring->id] = submit->seqno; a5xx_flush(gpu, ring, true); a5xx_preempt_trigger(gpu); @@ -210,6 +213,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); OUT_RING(ring, submit->seqno); + a5xx_gpu->last_seqno[ring->id] = submit->seqno; /* * Execute a CACHE_FLUSH_TS event. This will ensure that the diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index b4d06ca3e499..9c0d701fe4b8 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -34,6 +34,7 @@ struct a5xx_gpu { struct drm_gem_object *preempt_counters_bo[MSM_GPU_MAX_RINGS]; struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; + uint32_t last_seqno[MSM_GPU_MAX_RINGS]; atomic_t preempt_state; spinlock_t preempt_start_lock; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index c65b34a4a8cc..0469fea55010 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -55,6 +55,8 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Return the highest priority ringbuffer with something in it */ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); unsigned long flags; int i; @@ -64,6 +66,8 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) spin_lock_irqsave(&ring->preempt_lock, flags); empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); + if (!empty && ring == a5xx_gpu->cur_ring) + empty = ring->memptrs->fence == a5xx_gpu->last_seqno[i]; spin_unlock_irqrestore(&ring->preempt_lock, flags); if (!empty) From 52d571a213495938406569db8ecebb95cb546bc2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 1 Sep 2024 11:02:11 +0200 Subject: [PATCH 163/534] ipmi: docs: don't advertise deprecated sysfs entries [ Upstream commit 64dce81f8c373c681e62d5ffe0397c45a35d48a2 ] "i2c-adapter" class entries are deprecated since 2009. Switch to the proper location. Reported-by: Heiner Kallweit Closes: https://lore.kernel.org/r/80c4a898-5867-4162-ac85-bdf7c7c68746@gmail.com Fixes: 259307074bfc ("ipmi: Add SMBus interface driver (SSIF)") Signed-off-by: Wolfram Sang Message-Id: <20240901090211.3797-2-wsa+renesas@sang-engineering.com> Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- Documentation/driver-api/ipmi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/ipmi.rst b/Documentation/driver-api/ipmi.rst index e224e47b6b09..dfa021eacd63 100644 --- a/Documentation/driver-api/ipmi.rst +++ b/Documentation/driver-api/ipmi.rst @@ -540,7 +540,7 @@ at module load time (for a module) with:: alerts_broken The addresses are normal I2C addresses. The adapter is the string -name of the adapter, as shown in /sys/class/i2c-adapter/i2c-/name. +name of the adapter, as shown in /sys/bus/i2c/devices/i2c-/name. It is *NOT* i2c- itself. Also, the comparison is done ignoring spaces, so if the name is "This is an I2C chip" you can say adapter_name=ThisisanI2cchip. This is because it's hard to pass in From 476945372b1f8b78fb30c5b191ef4c24a210dcad Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 4 Aug 2024 08:40:07 +0300 Subject: [PATCH 164/534] drm/msm/dsi: correct programming sequence for SM8350 / SM8450 [ Upstream commit 1328cb7c34bf6d056df9ff694ee5194537548258 ] According to the display-drivers, 5nm DSI PLL (v4.2, v4.3) have different boundaries for pll_clock_inverters programming. Follow the vendor code and use correct values. Fixes: 2f9ae4e395ed ("drm/msm/dsi: add support for DSI-PHY on SM8350 and SM8450") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/606947/ Link: https://lore.kernel.org/r/20240804-sm8350-fixes-v1-3-1149dd8399fe@linaro.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 89a6344bc865..f72ce6a3c456 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -135,7 +135,7 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_7nm *pll, struct dsi_pll_config config->pll_clock_inverters = 0x00; else config->pll_clock_inverters = 0x40; - } else { + } else if (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1) { if (pll_freq <= 1000000000ULL) config->pll_clock_inverters = 0xa0; else if (pll_freq <= 2500000000ULL) @@ -144,6 +144,16 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_7nm *pll, struct dsi_pll_config config->pll_clock_inverters = 0x00; else config->pll_clock_inverters = 0x40; + } else { + /* 4.2, 4.3 */ + if (pll_freq <= 1000000000ULL) + config->pll_clock_inverters = 0xa0; + else if (pll_freq <= 2500000000ULL) + config->pll_clock_inverters = 0x20; + else if (pll_freq <= 3500000000ULL) + config->pll_clock_inverters = 0x00; + else + config->pll_clock_inverters = 0x40; } config->decimal_div_start = dec; From 614773a4e536f18c2b28016d1bd464e28c3c5160 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Tue, 27 Aug 2024 09:53:37 -0700 Subject: [PATCH 165/534] drm/msm: fix %s null argument error [ Upstream commit 25b85075150fe8adddb096db8a4b950353045ee1 ] The following build error was triggered because of NULL string argument: BUILDSTDERR: drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c: In function 'mdp5_smp_dump': BUILDSTDERR: drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c:352:51: error: '%s' directive argument is null [-Werror=format-overflow=] BUILDSTDERR: 352 | drm_printf(p, "%s:%d\t%d\t%s\n", BUILDSTDERR: | ^~ BUILDSTDERR: drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c:352:51: error: '%s' directive argument is null [-Werror=format-overflow=] This happens from the commit a61ddb4393ad ("drm: enable (most) W=1 warnings by default across the subsystem"). Using "(null)" instead to fix it. Fixes: bc5289eed481 ("drm/msm/mdp5: add debugfs to show smp block status") Signed-off-by: Sherry Yang Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/611071/ Link: https://lore.kernel.org/r/20240827165337.1075904-1-sherry.yang@oracle.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index 56a3063545ec..12d07e93a4c4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -356,7 +356,7 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p) drm_printf(p, "%s:%d\t%d\t%s\n", pipe2name(pipe), j, inuse, - plane ? plane->name : NULL); + plane ? plane->name : "(null)"); total += inuse; } From 37c40c01cf1ac034194f31d7a7def97ad856ef2c Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 17:09:27 +0800 Subject: [PATCH 166/534] drivers:drm:exynos_drm_gsc:Fix wrong assignment in gsc_bind() [ Upstream commit 94ebc3d3235c5c516f67315059ce657e5090e94b ] cocci reported a double assignment problem. Upon reviewing previous commits, it appears this may actually be an incorrect assignment. Fixes: 8b9550344d39 ("drm/ipp: clean up debug messages") Signed-off-by: Yuesong Li Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 5302bebbe38c..1456abd5b9dd 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1173,7 +1173,7 @@ static int gsc_bind(struct device *dev, struct device *master, void *data) struct exynos_drm_ipp *ipp = &ctx->ipp; ctx->drm_dev = drm_dev; - ctx->drm_dev = drm_dev; + ipp->drm_dev = drm_dev; exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv); exynos_drm_ipp_register(dev, ipp, &ipp_funcs, From cafeba3c2a1f015e26feb5629ae696e84cfb5ec9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 3 Aug 2024 08:01:22 +0200 Subject: [PATCH 167/534] xen: use correct end address of kernel for conflict checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fac1bceeeb04886fc2ee952672e6e6c85ce41dca ] When running as a Xen PV dom0 the kernel is loaded by the hypervisor using a different memory map than that of the host. In order to minimize the required changes in the kernel, the kernel adapts its memory map to that of the host. In order to do that it is checking for conflicts of its load address with the host memory map. Unfortunately the tested memory range does not include the .brk area, which might result in crashes or memory corruption when this area does conflict with the memory map of the host. Fix the test by using the _end label instead of __bss_stop. Fixes: 808fdb71936c ("xen: check for kernel memory conflicting with memory layout") Signed-off-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 380591028cb8..d44d6d8b3319 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -825,7 +825,7 @@ char * __init xen_memory_setup(void) * to relocating (and even reusing) pages with kernel text or data. */ if (xen_is_e820_reserved(__pa_symbol(_text), - __pa_symbol(__bss_stop) - __pa_symbol(_text))) { + __pa_symbol(_end) - __pa_symbol(_text))) { xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n"); BUG(); } From 85572bf646e4914b487117736f404701ae252339 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 9 Sep 2024 13:32:07 -0700 Subject: [PATCH 168/534] HID: wacom: Support sequence numbers smaller than 16-bit [ Upstream commit 359673ea3a203611b4f6d0f28922a4b9d2cfbcc8 ] The current dropped packet reporting assumes that all sequence numbers are 16 bits in length. This results in misleading "Dropped" messages if the hardware uses fewer bits. For example, if a tablet uses only 8 bits to store its sequence number, once it rolls over from 255 -> 0, the driver will still be expecting a packet "256". This patch adjusts the logic to reset the next expected packet to logical_minimum whenever it overflows beyond logical_maximum. Signed-off-by: Jason Gerecke Tested-by: Joshua Dickens Fixes: 6d09085b38e5 ("HID: wacom: Adding Support for new usages") Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/wacom_wac.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 5db26a8af772..c3c576641a38 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2492,9 +2492,14 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field wacom_wac->hid_data.barrelswitch3 = value; return; case WACOM_HID_WD_SEQUENCENUMBER: - if (wacom_wac->hid_data.sequence_number != value) - hid_warn(hdev, "Dropped %hu packets", (unsigned short)(value - wacom_wac->hid_data.sequence_number)); + if (wacom_wac->hid_data.sequence_number != value) { + int sequence_size = field->logical_maximum - field->logical_minimum + 1; + int drop_count = (value - wacom_wac->hid_data.sequence_number) % sequence_size; + hid_warn(hdev, "Dropped %d packets", drop_count); + } wacom_wac->hid_data.sequence_number = value + 1; + if (wacom_wac->hid_data.sequence_number > field->logical_maximum) + wacom_wac->hid_data.sequence_number = field->logical_minimum; return; } From f7b4ba5f78f240bc0afc916d2fa6df8b7fd68fac Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 9 Sep 2024 13:32:08 -0700 Subject: [PATCH 169/534] HID: wacom: Do not warn about dropped packets for first packet [ Upstream commit 84aecf2d251a3359bc78b7c8e58f54b9fc966e89 ] The driver currently assumes that the first sequence number it will see is going to be 0. This is not a realiable assumption and can break if, for example, the tablet has already been running for some time prior to the kernel driver connecting to the device. This commit initializes the expected sequence number to -1 and will only print the "Dropped" warning the it has been updated to a non-negative value. Signed-off-by: Jason Gerecke Tested-by: Joshua Dickens Fixes: 6d09085b38e5 ("HID: wacom: Adding Support for new usages") Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/wacom_wac.c | 6 +++++- drivers/hid/wacom_wac.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index c3c576641a38..18b5cd0234d2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2368,6 +2368,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, wacom_map_usage(input, usage, field, EV_KEY, BTN_STYLUS3, 0); features->quirks &= ~WACOM_QUIRK_PEN_BUTTON3; break; + case WACOM_HID_WD_SEQUENCENUMBER: + wacom_wac->hid_data.sequence_number = -1; + break; } } @@ -2492,7 +2495,8 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field wacom_wac->hid_data.barrelswitch3 = value; return; case WACOM_HID_WD_SEQUENCENUMBER: - if (wacom_wac->hid_data.sequence_number != value) { + if (wacom_wac->hid_data.sequence_number != value && + wacom_wac->hid_data.sequence_number >= 0) { int sequence_size = field->logical_maximum - field->logical_minimum + 1; int drop_count = (value - wacom_wac->hid_data.sequence_number) % sequence_size; hid_warn(hdev, "Dropped %d packets", drop_count); diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 57e185f18d53..61073fe81ead 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -324,7 +324,7 @@ struct hid_data { int bat_connected; int ps_connected; bool pad_input_event_flag; - unsigned short sequence_number; + int sequence_number; ktime_t time_delayed; }; From 27f113dc120c132936cc86fd4e88928733afa59d Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 9 Sep 2024 17:42:38 +0200 Subject: [PATCH 170/534] ata: libata: Clear DID_TIME_OUT for ATA PT commands with sense data [ Upstream commit e5dd410acb34c7341a0a93b429dcf3dabf9e3323 ] When ata_qc_complete() schedules a command for EH using ata_qc_schedule_eh(), blk_abort_request() will be called, which leads to req->q->mq_ops->timeout() / scsi_timeout() being called. scsi_timeout(), if the LLDD has no abort handler (libata has no abort handler), will set host byte to DID_TIME_OUT, and then call scsi_eh_scmd_add() to add the command to EH. Thus, when commands first enter libata's EH strategy_handler, all the commands that have been added to EH will have DID_TIME_OUT set. libata has its own flag (AC_ERR_TIMEOUT), that it sets for commands that have not received a completion at the time of entering EH. Thus, libata doesn't really care about DID_TIME_OUT at all, and currently clears the host byte at the end of EH, in ata_scsi_qc_complete(), before scsi_eh_finish_cmd() is called. However, this clearing in ata_scsi_qc_complete() is currently only done for commands that are not ATA passthrough commands. Since the host byte is visible in the completion that we return to user space for ATA passthrough commands, for ATA passthrough commands that got completed via EH (commands with sense data), the user will incorrectly see: ATA pass-through(16): transport error: Host_status=0x03 [DID_TIME_OUT] Fix this by moving the clearing of the host byte (which is currently only done for commands that are not ATA passthrough commands) from ata_scsi_qc_complete() to the start of EH (regardless if the command is ATA passthrough or not). While at it, use the proper helper function to clear the host byte, rather than open coding the clearing. This will make sure that we: -Correctly clear DID_TIME_OUT for both ATA passthrough commands and commands that are not ATA passthrough commands. -Do not needlessly clear the host byte for commands that did not go via EH. ata_scsi_qc_complete() is called both for commands that are completed normally (without going via EH), and for commands that went via EH, however, only commands that went via EH will have DID_TIME_OUT set. Fixes: 24aeebbf8ea9 ("scsi: ata: libata: Change ata_eh_request_sense() to not set CHECK_CONDITION") Reported-by: Igor Pylypiv Closes: https://lore.kernel.org/linux-ide/ZttIN8He8TOZ7Lct@google.com/ Signed-off-by: Niklas Cassel Tested-by: Igor Pylypiv Reviewed-by: Hannes Reinecke Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-eh.c | 8 ++++++++ drivers/ata/libata-scsi.c | 3 --- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1168e29cae86..cd8745737545 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -618,6 +618,14 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { struct ata_queued_cmd *qc; + /* + * If the scmd was added to EH, via ata_qc_schedule_eh() -> + * scsi_timeout() -> scsi_eh_scmd_add(), scsi_timeout() will + * have set DID_TIME_OUT (since libata does not have an abort + * handler). Thus, to clear DID_TIME_OUT, clear the host byte. + */ + set_host_byte(scmd, DID_OK); + ata_qc_for_each_raw(ap, qc, i) { if (qc->flags & ATA_QCFLAG_ACTIVE && qc->scsicmd == scmd) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index c91f8746289f..7a71add807a3 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1725,9 +1725,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION); } else if (is_error && !have_sense) { ata_gen_ata_sense(qc); - } else { - /* Keep the SCSI ML and status byte, clear host byte. */ - cmd->result &= 0x0000ffff; } ata_qc_done(qc); From 35a10211dec2b9d4e93f88837bc93168f485d371 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 26 Jul 2024 15:09:07 -0700 Subject: [PATCH 171/534] minmax: avoid overly complex min()/max() macro arguments in xen [ Upstream commit e8432ac802a028eaee6b1e86383d7cd8e9fb8431 ] We have some very fancy min/max macros that have tons of sanity checking to warn about mixed signedness etc. This is all things that a sane compiler should warn about, but there are no sane compiler interfaces for this, and '-Wsign-compare' is broken [1] and not useful. So then we compensate (some would say over-compensate) by doing the checks manually with some truly horrid macro games. And no, we can't just use __builtin_types_compatible_p(), because the whole question of "does it make sense to compare these two values" is a lot more complicated than that. For example, it makes a ton of sense to compare unsigned values with simple constants like "5", even if that is indeed a signed type. So we have these very strange macros to try to make sensible type checking decisions on the arguments to 'min()' and 'max()'. But that can cause enormous code expansion if the min()/max() macros are used with complicated expressions, and particularly if you nest these things so that you get the first big expansion then expanded again. The xen setup.c file ended up ballooning to over 50MB of preprocessed noise that takes 15s to compile (obviously depending on the build host), largely due to one single line. So let's split that one single line to just be simpler. I think it ends up being more legible to humans too at the same time. Now that single file compiles in under a second. Reported-and-reviewed-by: Lorenzo Stoakes Link: https://lore.kernel.org/all/c83c17bb-be75-4c67-979d-54eee38774c6@lucifer.local/ Link: https://staticthinking.wordpress.com/2023/07/25/wsign-compare-is-garbage/ [1] Cc: David Laight Signed-off-by: Linus Torvalds Stable-dep-of: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Signed-off-by: Sasha Levin --- arch/x86/xen/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d44d6d8b3319..d2073df5c562 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -691,6 +691,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long maxmem_pages; int i; int op; @@ -762,8 +763,8 @@ char * __init xen_memory_setup(void) * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), - extra_pages, max_pages - max_pfn); + maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)); + extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; From 242d0c3c40cc52a01a2aea43cffae658da057bfb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 2 Aug 2024 14:11:06 +0200 Subject: [PATCH 172/534] xen: introduce generic helper checking for memory map conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ba88829706e2c5b7238638fc2b0713edf596495e ] When booting as a Xen PV dom0 the memory layout of the dom0 is modified to match that of the host, as this requires less changes in the kernel for supporting Xen. There are some cases, though, which are problematic, as it is the Xen hypervisor selecting the kernel's load address plus some other data, which might conflict with the host's memory map. These conflicts are detected at boot time and result in a boot error. In order to support handling at least some of these conflicts in future, introduce a generic helper function which will later gain the ability to adapt the memory layout when possible. Add the missing check for the xen_start_info area. Note that possible p2m map and initrd memory conflicts are handled already by copying the data to memory areas not conflicting with the memory map. The initial stack allocated by Xen doesn't need to be checked, as early boot code is switching to the statically allocated initial kernel stack. Initial page tables and the kernel itself will be handled later. Signed-off-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Stable-dep-of: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Signed-off-by: Sasha Levin --- arch/x86/xen/mmu_pv.c | 5 +---- arch/x86/xen/setup.c | 34 ++++++++++++++++++++++++++++------ arch/x86/xen/xen-ops.h | 3 ++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9d4a9311e819..6b201e64d8ab 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2019,10 +2019,7 @@ void __init xen_reserve_special_pages(void) void __init xen_pt_check_e820(void) { - if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) { - xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n"); - BUG(); - } + xen_chk_is_e820_usable(xen_pt_base, xen_pt_size, "page table"); } static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d2073df5c562..84cbc7ec5581 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -568,7 +568,7 @@ static void __init xen_ignore_unusable(void) } } -bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size) +static bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size) { struct e820_entry *entry; unsigned mapcnt; @@ -625,6 +625,23 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size) return 0; } +/* + * Check for an area in physical memory to be usable for non-movable purposes. + * An area is considered to usable if the used E820 map lists it to be RAM. + * In case the area is not usable, crash the system with an error message. + */ +void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size, + const char *component) +{ + if (!xen_is_e820_reserved(start, size)) + return; + + xen_raw_console_write("Xen hypervisor allocated "); + xen_raw_console_write(component); + xen_raw_console_write(" memory conflicts with E820 map\n"); + BUG(); +} + /* * Like memcpy, but with physical addresses for dest and src. */ @@ -825,11 +842,16 @@ char * __init xen_memory_setup(void) * Failing now is better than running into weird problems later due * to relocating (and even reusing) pages with kernel text or data. */ - if (xen_is_e820_reserved(__pa_symbol(_text), - __pa_symbol(_end) - __pa_symbol(_text))) { - xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n"); - BUG(); - } + xen_chk_is_e820_usable(__pa_symbol(_text), + __pa_symbol(_end) - __pa_symbol(_text), + "kernel"); + + /* + * Check for a conflict of the xen_start_info memory with the target + * E820 map. + */ + xen_chk_is_e820_usable(__pa(xen_start_info), sizeof(*xen_start_info), + "xen_start_info"); /* * Check for a conflict of the hypervisor supplied page tables with diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 79cf93f2c92f..f46a1dd3624d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -44,7 +44,8 @@ void xen_mm_unpin_all(void); void __init xen_relocate_p2m(void); #endif -bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size); +void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size, + const char *component); unsigned long __ref xen_chk_extra_mem(unsigned long pfn); void __init xen_inv_extra_mem(void); void __init xen_remap_memory(void); From f12153eece97e158cc75c4b44a66828352c4f7fd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 6 Aug 2024 10:24:41 +0200 Subject: [PATCH 173/534] xen: move max_pfn in xen_memory_setup() out of function scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 43dc2a0f479b9cd30f6674986d7a40517e999d31 ] Instead of having max_pfn as a local variable of xen_memory_setup(), make it a static variable in setup.c instead. This avoids having to pass it to subfunctions, which will be needed in more cases in future. Rename it to ini_nr_pages, as the value denotes the currently usable number of memory pages as passed from the hypervisor at boot time. Signed-off-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Stable-dep-of: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Signed-off-by: Sasha Levin --- arch/x86/xen/setup.c | 52 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 84cbc7ec5581..112b071bac9d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -47,6 +47,9 @@ bool xen_pv_pci_possible; /* E820 map used during setting up memory. */ static struct e820_table xen_e820_table __initdata; +/* Number of initially usable memory pages. */ +static unsigned long ini_nr_pages __initdata; + /* * Buffer used to remap identity mapped pages. We only need the virtual space. * The physical page behind this address is remapped as needed to different @@ -213,7 +216,7 @@ static int __init xen_free_mfn(unsigned long mfn) * as a fallback if the remapping fails. */ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, - unsigned long end_pfn, unsigned long nr_pages) + unsigned long end_pfn) { unsigned long pfn, end; int ret; @@ -221,7 +224,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, WARN_ON(start_pfn > end_pfn); /* Release pages first. */ - end = min(end_pfn, nr_pages); + end = min(end_pfn, ini_nr_pages); for (pfn = start_pfn; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -342,15 +345,14 @@ static void __init xen_do_set_identity_and_remap_chunk( * to Xen and not remapped. */ static unsigned long __init xen_set_identity_and_remap_chunk( - unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, - unsigned long remap_pfn) + unsigned long start_pfn, unsigned long end_pfn, unsigned long remap_pfn) { unsigned long pfn; unsigned long i = 0; unsigned long n = end_pfn - start_pfn; if (remap_pfn == 0) - remap_pfn = nr_pages; + remap_pfn = ini_nr_pages; while (i < n) { unsigned long cur_pfn = start_pfn + i; @@ -359,19 +361,19 @@ static unsigned long __init xen_set_identity_and_remap_chunk( unsigned long remap_range_size; /* Do not remap pages beyond the current allocation */ - if (cur_pfn >= nr_pages) { + if (cur_pfn >= ini_nr_pages) { /* Identity map remaining pages */ set_phys_range_identity(cur_pfn, cur_pfn + size); break; } - if (cur_pfn + size > nr_pages) - size = nr_pages - cur_pfn; + if (cur_pfn + size > ini_nr_pages) + size = ini_nr_pages - cur_pfn; remap_range_size = xen_find_pfn_range(&remap_pfn); if (!remap_range_size) { pr_warn("Unable to find available pfn range, not remapping identity pages\n"); xen_set_identity_and_release_chunk(cur_pfn, - cur_pfn + left, nr_pages); + cur_pfn + left); break; } /* Adjust size to fit in current e820 RAM region */ @@ -398,18 +400,18 @@ static unsigned long __init xen_set_identity_and_remap_chunk( } static unsigned long __init xen_count_remap_pages( - unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, + unsigned long start_pfn, unsigned long end_pfn, unsigned long remap_pages) { - if (start_pfn >= nr_pages) + if (start_pfn >= ini_nr_pages) return remap_pages; - return remap_pages + min(end_pfn, nr_pages) - start_pfn; + return remap_pages + min(end_pfn, ini_nr_pages) - start_pfn; } -static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, +static unsigned long __init xen_foreach_remap_area( unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn, - unsigned long nr_pages, unsigned long last_val)) + unsigned long last_val)) { phys_addr_t start = 0; unsigned long ret_val = 0; @@ -437,8 +439,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, end_pfn = PFN_UP(entry->addr); if (start_pfn < end_pfn) - ret_val = func(start_pfn, end_pfn, nr_pages, - ret_val); + ret_val = func(start_pfn, end_pfn, ret_val); start = end; } } @@ -701,7 +702,7 @@ static void __init xen_reserve_xen_mfnlist(void) **/ char * __init xen_memory_setup(void) { - unsigned long max_pfn, pfn_s, n_pfns; + unsigned long pfn_s, n_pfns; phys_addr_t mem_end, addr, size, chunk_size; u32 type; int rc; @@ -713,9 +714,8 @@ char * __init xen_memory_setup(void) int op; xen_parse_512gb(); - max_pfn = xen_get_pages_limit(); - max_pfn = min(max_pfn, xen_start_info->nr_pages); - mem_end = PFN_PHYS(max_pfn); + ini_nr_pages = min(xen_get_pages_limit(), xen_start_info->nr_pages); + mem_end = PFN_PHYS(ini_nr_pages); memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries); set_xen_guest_handle(memmap.buffer, xen_e820_table.entries); @@ -768,10 +768,10 @@ char * __init xen_memory_setup(void) max_pages = xen_get_max_pages(); /* How many extra pages do we need due to remapping? */ - max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages); + max_pages += xen_foreach_remap_area(xen_count_remap_pages); - if (max_pages > max_pfn) - extra_pages += max_pages - max_pfn; + if (max_pages > ini_nr_pages) + extra_pages += max_pages - ini_nr_pages; /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO @@ -780,8 +780,8 @@ char * __init xen_memory_setup(void) * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)); - extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn); + maxmem_pages = EXTRA_MEM_RATIO * min(ini_nr_pages, PFN_DOWN(MAXMEM)); + extra_pages = min3(maxmem_pages, extra_pages, max_pages - ini_nr_pages); i = 0; addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; @@ -886,7 +886,7 @@ char * __init xen_memory_setup(void) * Set identity map on non-RAM pages and prepare remapping the * underlying RAM. */ - xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk); + xen_foreach_remap_area(xen_set_identity_and_remap_chunk); pr_info("Released %ld page(s)\n", xen_released_pages); From 149fbd6aecdb550880498f093972eb0f777d3b86 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 14 Aug 2024 16:47:25 +0200 Subject: [PATCH 174/534] xen: add capability to remap non-RAM pages to different PFNs [ Upstream commit d05208cf7f05420ad10cc7f9550f91d485523659 ] When running as a Xen PV dom0 it can happen that the kernel is being loaded to a guest physical address conflicting with the host memory map. In order to be able to resolve this conflict, add the capability to remap non-RAM areas to different guest PFNs. A function to use this remapping information for other purposes than doing the remap will be added when needed. As the number of conflicts should be rather low (currently only machines with max. 1 conflict are known), save the remap data in a small statically allocated array. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Stable-dep-of: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Signed-off-by: Sasha Levin --- arch/x86/xen/p2m.c | 63 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-ops.h | 3 ++ 2 files changed, 66 insertions(+) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 4c2bf989edaf..6cb8dae768fa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "multicalls.h" #include "xen-ops.h" @@ -794,6 +795,68 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } +/* Remapped non-RAM areas */ +#define NR_NONRAM_REMAP 4 +static struct nonram_remap { + phys_addr_t maddr; + phys_addr_t paddr; + size_t size; +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init; +static unsigned int nr_nonram_remap __ro_after_init; + +/* + * Do the real remapping of non-RAM regions as specified in the + * xen_nonram_remap[] array. + * In case of an error just crash the system. + */ +void __init xen_do_remap_nonram(void) +{ + unsigned int i; + unsigned int remapped = 0; + const struct nonram_remap *remap = xen_nonram_remap; + unsigned long pfn, mfn, end_pfn; + + for (i = 0; i < nr_nonram_remap; i++) { + end_pfn = PFN_UP(remap->paddr + remap->size); + pfn = PFN_DOWN(remap->paddr); + mfn = PFN_DOWN(remap->maddr); + while (pfn < end_pfn) { + if (!set_phys_to_machine(pfn, mfn)) + panic("Failed to set p2m mapping for pfn=%lx mfn=%lx\n", + pfn, mfn); + + pfn++; + mfn++; + remapped++; + } + + remap++; + } + + pr_info("Remapped %u non-RAM page(s)\n", remapped); +} + +/* + * Add a new non-RAM remap entry. + * In case of no free entry found, just crash the system. + */ +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, + unsigned long size) +{ + BUG_ON((maddr & ~PAGE_MASK) != (paddr & ~PAGE_MASK)); + + if (nr_nonram_remap == NR_NONRAM_REMAP) { + xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n"); + BUG(); + } + + xen_nonram_remap[nr_nonram_remap].maddr = maddr; + xen_nonram_remap[nr_nonram_remap].paddr = paddr; + xen_nonram_remap[nr_nonram_remap].size = size; + + nr_nonram_remap++; +} + #ifdef CONFIG_XEN_DEBUG_FS #include #include "debugfs.h" diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f46a1dd3624d..a6a21dd05527 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -43,6 +43,9 @@ void xen_mm_unpin_all(void); #ifdef CONFIG_X86_64 void __init xen_relocate_p2m(void); #endif +void __init xen_do_remap_nonram(void); +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, + unsigned long size); void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size, const char *component); From ac8ec1268e7a9daa3b8c7307556e5daf9831caca Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 2 Aug 2024 20:14:22 +0200 Subject: [PATCH 175/534] xen: tolerate ACPI NVS memory overlapping with Xen allocated memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit be35d91c8880650404f3bf813573222dfb106935 ] In order to minimize required special handling for running as Xen PV dom0, the memory layout is modified to match that of the host. This requires to have only RAM at the locations where Xen allocated memory is living. Unfortunately there seem to be some machines, where ACPI NVS is located at 64 MB, resulting in a conflict with the loaded kernel or the initial page tables built by Xen. Avoid this conflict by swapping the ACPI NVS area in the memory map with unused RAM. This is possible via modification of the dom0 P2M map. Accesses to the ACPI NVS area are done either for saving and restoring it across suspend operations (this will work the same way as before), or by ACPI code when NVS memory is referenced from other ACPI tables. The latter case is handled by a Xen specific indirection of acpi_os_ioremap(). While the E820 map can (and should) be modified right away, the P2M map can be updated only after memory allocation is working, as the P2M map might need to be extended. Fixes: 808fdb71936c ("xen: check for kernel memory conflicting with memory layout") Signed-off-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/setup.c | 92 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 112b071bac9d..989c14b1ff6c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -496,6 +496,8 @@ void __init xen_remap_memory(void) set_pte_mfn(buf, mfn_save, PAGE_KERNEL); pr_info("Remapped %ld page(s)\n", remapped); + + xen_do_remap_nonram(); } static unsigned long __init xen_get_pages_limit(void) @@ -626,14 +628,102 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size) return 0; } +/* + * Swap a non-RAM E820 map entry with RAM above ini_nr_pages. + * Note that the E820 map is modified accordingly, but the P2M map isn't yet. + * The adaption of the P2M must be deferred until page allocation is possible. + */ +static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry) +{ + struct e820_entry *entry; + unsigned int mapcnt; + phys_addr_t mem_end = PFN_PHYS(ini_nr_pages); + phys_addr_t swap_addr, swap_size, entry_end; + + swap_addr = PAGE_ALIGN_DOWN(swap_entry->addr); + swap_size = PAGE_ALIGN(swap_entry->addr - swap_addr + swap_entry->size); + entry = xen_e820_table.entries; + + for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { + entry_end = entry->addr + entry->size; + if (entry->type == E820_TYPE_RAM && entry->size >= swap_size && + entry_end - swap_size >= mem_end) { + /* Reduce RAM entry by needed space (whole pages). */ + entry->size -= swap_size; + + /* Add new entry at the end of E820 map. */ + entry = xen_e820_table.entries + + xen_e820_table.nr_entries; + xen_e820_table.nr_entries++; + + /* Fill new entry (keep size and page offset). */ + entry->type = swap_entry->type; + entry->addr = entry_end - swap_size + + swap_addr - swap_entry->addr; + entry->size = swap_entry->size; + + /* Convert old entry to RAM, align to pages. */ + swap_entry->type = E820_TYPE_RAM; + swap_entry->addr = swap_addr; + swap_entry->size = swap_size; + + /* Remember PFN<->MFN relation for P2M update. */ + xen_add_remap_nonram(swap_addr, entry_end - swap_size, + swap_size); + + /* Order E820 table and merge entries. */ + e820__update_table(&xen_e820_table); + + return; + } + + entry++; + } + + xen_raw_console_write("No suitable area found for required E820 entry remapping action\n"); + BUG(); +} + +/* + * Look for non-RAM memory types in a specific guest physical area and move + * those away if possible (ACPI NVS only for now). + */ +static void __init xen_e820_resolve_conflicts(phys_addr_t start, + phys_addr_t size) +{ + struct e820_entry *entry; + unsigned int mapcnt; + phys_addr_t end; + + if (!size) + return; + + end = start + size; + entry = xen_e820_table.entries; + + for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { + if (entry->addr >= end) + return; + + if (entry->addr + entry->size > start && + entry->type == E820_TYPE_NVS) + xen_e820_swap_entry_with_ram(entry); + + entry++; + } +} + /* * Check for an area in physical memory to be usable for non-movable purposes. - * An area is considered to usable if the used E820 map lists it to be RAM. + * An area is considered to usable if the used E820 map lists it to be RAM or + * some other type which can be moved to higher PFNs while keeping the MFNs. * In case the area is not usable, crash the system with an error message. */ void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size, const char *component) { + xen_e820_resolve_conflicts(start, size); + if (!xen_is_e820_reserved(start, size)) return; From d1691e9778600fa8b7f918cc8dc76db1d60a1e23 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 13 Sep 2024 12:05:02 +0200 Subject: [PATCH 176/534] xen/swiotlb: add alignment check for dma buffers [ Upstream commit 9f40ec84a7976d95c34e7cc070939deb103652b0 ] When checking a memory buffer to be consecutive in machine memory, the alignment needs to be checked, too. Failing to do so might result in DMA memory not being aligned according to its requested size, leading to error messages like: 4xxx 0000:2b:00.0: enabling device (0140 -> 0142) 4xxx 0000:2b:00.0: Ring address not aligned 4xxx 0000:2b:00.0: Failed to initialise service qat_crypto 4xxx 0000:2b:00.0: Resetting device qat_dev0 4xxx: probe of 0000:2b:00.0 failed with error -14 Fixes: 9435cce87950 ("xen/swiotlb: Add support for 64KB page granularity") Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/swiotlb-xen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0e6c6c25d154..892f87300e2c 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -78,9 +78,15 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); + phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); next_bfn = pfn_to_bfn(xen_pfn); + /* If buffer is physically aligned, ensure DMA alignment. */ + if (IS_ALIGNED(p, algn) && + !IS_ALIGNED((phys_addr_t)next_bfn << XEN_PAGE_SHIFT, algn)) + return 1; + for (i = 1; i < nr_pages; i++) if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 1; From 9c21cdae4b93e8fa2740664c589c7f7f49859307 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sun, 15 Sep 2024 13:06:44 +0200 Subject: [PATCH 177/534] xen/swiotlb: fix allocated size [ Upstream commit c3dea3d54f4d399f8044547f0f1abdccbdfb0fee ] The allocated size in xen_swiotlb_alloc_coherent() and xen_swiotlb_free_coherent() is calculated wrong for the case of XEN_PAGE_SIZE not matching PAGE_SIZE. Fix that. Fixes: 7250f422da04 ("xen-swiotlb: use actually allocated size on check physical continuous") Reported-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 892f87300e2c..6d0d1c8a508b 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -146,7 +146,7 @@ xen_swiotlb_alloc_coherent(struct device *dev, size_t size, void *ret; /* Align the allocation to the Xen page size */ - size = 1UL << (order + XEN_PAGE_SHIFT); + size = ALIGN(size, XEN_PAGE_SIZE); ret = (void *)__get_free_pages(flags, get_order(size)); if (!ret) @@ -178,7 +178,7 @@ xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, int order = get_order(size); /* Convert the size to actually allocated. */ - size = 1UL << (order + XEN_PAGE_SHIFT); + size = ALIGN(size, XEN_PAGE_SIZE); if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) From 82478cb8a23bd4f97935bbe60d64528c6d9918b4 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Fri, 16 Aug 2024 12:55:46 +0100 Subject: [PATCH 178/534] tpm: Clean up TPM space after command failure [ Upstream commit e3aaebcbb7c6b403416f442d1de70d437ce313a7 ] tpm_dev_transmit prepares the TPM space before attempting command transmission. However if the command fails no rollback of this preparation is done. This can result in transient handles being leaked if the device is subsequently closed with no further commands performed. Fix this by flushing the space in the event of command transmission failure. Fixes: 745b361e989a ("tpm: infrastructure for TPM spaces") Signed-off-by: Jonathan McDowell Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-dev-common.c | 2 ++ drivers/char/tpm/tpm2-space.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 30b4c288c1bb..c3fbbf4d3db7 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -47,6 +47,8 @@ static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space, if (!ret) ret = tpm2_commit_space(chip, space, buf, &len); + else + tpm2_flush_space(chip); out_rc: return ret ? ret : len; diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 363afdd4d1d3..d4d1007fe811 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -166,6 +166,9 @@ void tpm2_flush_space(struct tpm_chip *chip) struct tpm_space *space = &chip->work_space; int i; + if (!space) + return; + for (i = 0; i < ARRAY_SIZE(space->context_tbl); i++) if (space->context_tbl[i] && ~space->context_tbl[i]) tpm2_flush_context(chip, space->context_tbl[i]); From 5a4f8de92dd7ddaa7ce33aa804cbad254b844b8f Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 26 Jun 2024 10:35:05 +0800 Subject: [PATCH 179/534] sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy [ Upstream commit faa42d29419def58d3c3e5b14ad4037f0af3b496 ] Consider the following cgroup: root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE. Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED. Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Don Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@linux.alibaba.com Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2e1009e5706..5fc0d9cc9d9d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -533,7 +533,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8209,16 +8209,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (test_tsk_need_resched(curr)) return; - /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); @@ -8228,7 +8219,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ pse_is_idle = se_is_idle(pse); /* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -8236,9 +8227,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (cse_is_idle != pse_is_idle) return; + /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */ From e7d263b2947ce4bc8ba52d2bc0ef65406f9269ce Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 22 Jul 2024 19:08:15 -0700 Subject: [PATCH 180/534] selftests/bpf: Workaround strict bpf_lsm return value check. [ Upstream commit aa8ebb270c66cea1f56a25d0f938036e91ad085a ] test_progs-no_alu32 -t libbpf_get_fd_by_id_opts is being rejected by the verifier with the following error due to compiler optimization: 6: (67) r0 <<= 62 ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000)) 7: (c7) r0 s>>= 63 ; R0_w=scalar(smin=smin32=-1,smax=smax32=0) ; @ test_libbpf_get_fd_by_id_opts.c:0 8: (57) r0 &= -13 ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3)) ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27 9: (95) exit At program exit the register R0 has smax=9223372036854775795 should have been in [-4095, 0] Workaround by adding barrier(). Eventually the verifier will be able to recognize it. Fixes: 5d99e198be27 ("bpf, lsm: Add check for BPF LSM return value") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Sasha Levin --- .../testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c index f5ac5f3e8919..568816307f71 100644 --- a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c +++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c @@ -31,6 +31,7 @@ int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) if (fmode & FMODE_WRITE) return -EACCES; + barrier(); return 0; } From b6529a310dfa7c1dd99c301a4eda6890e182d220 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 17:13:29 -0700 Subject: [PATCH 181/534] selftests/bpf: Fix error linking uprobe_multi on mips [ Upstream commit a5f40d596bff182b4b47547712f540885e8fb17b ] Linking uprobe_multi.c on mips64el fails due to relocation overflows, when the GOT entries required exceeds the default maximum. Add a specific CFLAGS (-mxgot) for uprobe_multi.c on MIPS that allows using a larger GOT and avoids errors such as: /tmp/ccBTNQzv.o: in function `bench': uprobe_multi.c:49:(.text+0x1d7720): relocation truncated to fit: R_MIPS_GOT_DISP against `uprobe_multi_func_08188' uprobe_multi.c:49:(.text+0x1d7730): relocation truncated to fit: R_MIPS_GOT_DISP against `uprobe_multi_func_08189' ... collect2: error: ld returned 1 exit status Fixes: 519dfeaf5119 ("selftests/bpf: Add uprobe_multi test program") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/14eb7b70f8ccef9834874d75eb373cb9292129da.1721692479.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index caede9b574cb..598cd118dfd6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -702,6 +702,8 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# Linking uprobe_multi can fail due to relocation overflows on mips. +$(OUTPUT)/uprobe_multi: CFLAGS += $(if $(filter mips, $(ARCH)),-mxgot) $(OUTPUT)/uprobe_multi: uprobe_multi.c $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ From 5de3bd34dd5beae639dbf507627a4fca60db1f34 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Sat, 27 Jan 2024 11:07:02 +0100 Subject: [PATCH 182/534] bpf: Use -Wno-error in certain tests when building with GCC [ Upstream commit 646751d523587cfd7ebcf1733298ecd470879eda ] Certain BPF selftests contain code that, albeit being legal C, trigger warnings in GCC that cannot be disabled. This is the case for example for the tests progs/btf_dump_test_case_bitfields.c progs/btf_dump_test_case_namespacing.c progs/btf_dump_test_case_packing.c progs/btf_dump_test_case_padding.c progs/btf_dump_test_case_syntax.c which contain struct type declarations inside function parameter lists. This is problematic, because: - The BPF selftests are built with -Werror. - The Clang and GCC compilers sometimes differ when it comes to handle warnings. in the handling of warnings. One compiler may emit warnings for code that the other compiles compiles silently, and one compiler may offer the possibility to disable certain warnings, while the other doesn't. In order to overcome this problem, this patch modifies the tools/testing/selftests/bpf/Makefile in order to: 1. Enable the possibility of specifing per-source-file extra CFLAGS. This is done by defining a make variable like: -CFLAGS := And then modifying the proper Make rule in order to use these flags when compiling . 2. Use the mechanism above to add -Wno-error to CFLAGS for the following selftests: progs/btf_dump_test_case_bitfields.c progs/btf_dump_test_case_namespacing.c progs/btf_dump_test_case_packing.c progs/btf_dump_test_case_padding.c progs/btf_dump_test_case_syntax.c Note the corresponding -CFLAGS variables for these files are defined only if the selftests are being built with GCC. Note that, while compiler pragmas can generally be used to disable particular warnings per file, this 1) is only possible for warning that actually can be disabled in the command line, i.e. that have -Wno-FOO options, and 2) doesn't apply to -Wno-error. Tested in bpf-next master branch. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240127100702.21549-1-jose.marchesi@oracle.com Stable-dep-of: 3ece93a4087b ("selftests/bpf: Fix wrong binary in Makefile log output") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 598cd118dfd6..397306f5cf91 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -56,6 +56,15 @@ TEST_INST_SUBDIRS := no_alu32 ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc TEST_INST_SUBDIRS += bpf_gcc + +# The following tests contain C code that, although technically legal, +# triggers GCC warnings that cannot be disabled: declaration of +# anonymous struct types in function parameter lists. +progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error endif ifneq ($(CLANG_CPUV4),) @@ -492,7 +501,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ $(wildcard $(BPFDIR)/*.bpf.h) \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ - $(TRUNNER_BPF_CFLAGS)) + $(TRUNNER_BPF_CFLAGS) \ + $$($$<-CFLAGS)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) From 01aa0d2861be7546f403b0324cf70d2ebea79214 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 7 May 2024 09:42:27 +0200 Subject: [PATCH 183/534] bpf: Disable some `attribute ignored' warnings in GCC [ Upstream commit b0fbdf759da05a35b67fd27b8859738b79af25d6 ] This patch modifies selftests/bpf/Makefile to pass -Wno-attributes to GCC. This is because of the following attributes which are ignored: - btf_decl_tag - btf_type_tag There are many of these. At the moment none of these are recognized/handled by gcc-bpf. We are aware that btf_decl_tag is necessary for some of the selftest harness to communicate test failure/success. Support for it is in progress in GCC upstream: https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650482.html However, the GCC master branch is not yet open, so the series above (currently under review upstream) wont be able to make it there until 14.1 gets released, probably mid next week. As for btf_type_tag, more extensive work will be needed in GCC upstream to support it in both BTF and DWARF. We have a WIP big patch for that, but that is not needed to compile/build the selftests. - used There are SEC macros defined in the selftests as: #define SEC(N) __attribute__((section(N),used)) The SEC macro is used for both functions and global variables. According to the GCC documentation `used' attribute is really only meaningful for functions, and it warns when the attribute is used for other global objects, like for example ctl_array in test_xdp_noinline.c. Ignoring this is benign. - align_value In progs/test_cls_redirect.c:127 there is: typedef uint8_t *net_ptr __attribute__((align_value(8))); GCC warns that it is ignoring this attribute, because it is not implemented by GCC. I think ignoring this attribute in GCC is benign, because according to the clang documentation [1] its purpose seems to be merely declarative and doesn't seem to translate into extra checks at run-time, only to perhaps better optimized code ("runtime behavior is undefined if the pointed memory object is not aligned to the specified alignment"). [1] https://clang.llvm.org/docs/AttributeReference.html#align-value Tested in bpf-next master. Signed-off-by: Jose E. Marchesi Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240507074227.4523-3-jose.marchesi@oracle.com Stable-dep-of: 3ece93a4087b ("selftests/bpf: Fix wrong binary in Makefile log output") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 397306f5cf91..e0f499794f16 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -412,7 +412,7 @@ endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 -Wno-attributes -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c From 5d99839bfe1c6ebc7f927f363fea6cdbedc831cb Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 7 May 2024 11:50:11 +0200 Subject: [PATCH 184/534] bpf: Temporarily define BPF_NO_PRESEVE_ACCESS_INDEX for GCC [ Upstream commit 675b4e24bc50f4600b6bf3527fdbaa1f73498334 ] The vmlinux.h file generated by bpftool makes use of compiler pragmas in order to install the CO-RE preserve_access_index in all the struct types derived from the BTF info: #ifndef __VMLINUX_H__ #define __VMLINUX_H__ #ifndef BPF_NO_PRESERVE_ACCESS_INDEX #pragma clang attribute push (__attribute__((preserve_access_index)), apply_t = record #endif [... type definitions generated from kernel BTF ... ] #ifndef BPF_NO_PRESERVE_ACCESS_INDEX #pragma clang attribute pop #endif The `clang attribute push/pop' pragmas are specific to clang/llvm and are not supported by GCC. At the moment the BTF dumping services in libbpf do not support dicriminating between types dumped because they are directly referred and types dumped because they are dependencies. A suitable API is being worked now. See [1] and [2]. In the interim, this patch changes the selftests/bpf Makefile so it passes -DBPF_NO_PRESERVE_ACCESS_INDEX to GCC when it builds the selftests. This workaround is temporary, and may have an impact on the results of the GCC-built tests. [1] https://lore.kernel.org/bpf/20240503111836.25275-1-jose.marchesi@oracle.com/T/#u [2] https://lore.kernel.org/bpf/20240504205510.24785-1-jose.marchesi@oracle.com/T/#u Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240507095011.15867-1-jose.marchesi@oracle.com Stable-dep-of: 3ece93a4087b ("selftests/bpf: Fix wrong binary in Makefile log output") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0f499794f16..24b85060df77 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -412,7 +412,7 @@ endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(BPF_GCC) $3 -Wno-attributes -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c From 97e4a3ba9de74d0bbd29a5d2922e69f68fe7f441 Mon Sep 17 00:00:00 2001 From: Cupertino Miranda Date: Tue, 7 May 2024 13:22:19 +0100 Subject: [PATCH 185/534] selftests/bpf: Add CFLAGS per source file and runner [ Upstream commit 207cf6e649ee551ab3bdb1cfe1b2848e6a4337a5 ] This patch adds support to specify CFLAGS per source file and per test runner. Signed-off-by: Cupertino Miranda Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240507122220.207820-2-cupertino.miranda@oracle.com Stable-dep-of: 3ece93a4087b ("selftests/bpf: Fix wrong binary in Makefile log output") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 24b85060df77..0093d1161c6e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -60,11 +60,11 @@ TEST_INST_SUBDIRS += bpf_gcc # The following tests contain C code that, although technically legal, # triggers GCC warnings that cannot be disabled: declaration of # anonymous struct types in function parameter lists. -progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_bitfields.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_namespacing.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error endif ifneq ($(CLANG_CPUV4),) @@ -451,7 +451,7 @@ LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(ske # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 @@ -479,7 +479,7 @@ endef # Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and # set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) @@ -502,7 +502,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS) \ - $$($$<-CFLAGS)) + $$($$<-CFLAGS) \ + $$($$<-$2-CFLAGS)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) From cd1b7f772f56d2fb01898121cf2a2a17b62ac2a6 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Fri, 19 Jul 2024 22:25:35 -0700 Subject: [PATCH 186/534] selftests/bpf: Fix wrong binary in Makefile log output [ Upstream commit 3ece93a4087b2db7b99ebb2412bd60cf26bbbb51 ] Make log output incorrectly shows 'test_maps' as the binary name for every 'CLNG-BPF' build step, apparently picking up the last value defined for the $(TRUNNER_BINARY) variable. Update the 'CLANG_BPF_BUILD_RULE' variants to fix this confusing output. Current output: CLNG-BPF [test_maps] access_map_in_map.bpf.o GEN-SKEL [test_progs] access_map_in_map.skel.h ... CLNG-BPF [test_maps] access_map_in_map.bpf.o GEN-SKEL [test_progs-no_alu32] access_map_in_map.skel.h ... CLNG-BPF [test_maps] access_map_in_map.bpf.o GEN-SKEL [test_progs-cpuv4] access_map_in_map.skel.h After fix: CLNG-BPF [test_progs] access_map_in_map.bpf.o GEN-SKEL [test_progs] access_map_in_map.skel.h ... CLNG-BPF [test_progs-no_alu32] access_map_in_map.bpf.o GEN-SKEL [test_progs-no_alu32] access_map_in_map.skel.h ... CLNG-BPF [test_progs-cpuv4] access_map_in_map.bpf.o GEN-SKEL [test_progs-cpuv4] access_map_in_map.skel.h Fixes: a5d0c26a2784 ("selftests/bpf: Add a cpuv4 test runner for cpu=v4 testing") Fixes: 89ad7420b25c ("selftests/bpf: Drop the need for LLVM's llc") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240720052535.2185967-1-tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0093d1161c6e..ab364e95a9b2 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -395,23 +395,24 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h # $1 - input .c file # $2 - output .o file # $3 - CFLAGS +# $4 - binary name define CLANG_BPF_BUILD_RULE - $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-BPF,$4,$2) $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE - $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-BPF,$4,$2) $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4 define CLANG_CPUV4_BPF_BUILD_RULE - $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-BPF,$4,$2) $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE - $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) + $(call msg,GCC-BPF,$4,$2) $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2 endef @@ -503,7 +504,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS) \ $$($$<-CFLAGS) \ - $$($$<-$2-CFLAGS)) + $$($$<-$2-CFLAGS),$(TRUNNER_BINARY)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) From b0b99c1226eae6950d0e82f201efafef799732f6 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 17:30:45 -0700 Subject: [PATCH 187/534] tools/runqslower: Fix LDFLAGS and add LDLIBS support [ Upstream commit f86601c3661946721e8f260bdd812b759854ac22 ] Actually use previously defined LDFLAGS during build and add support for LDLIBS to link extra standalone libraries e.g. 'argp' which is not provided by musl libc. Fixes: 585bf4640ebe ("tools: runqslower: Add EXTRA_CFLAGS and EXTRA_LDFLAGS support") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20240723003045.2273499-1-tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/bpf/runqslower/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index d8288936c912..c4f1f1735af6 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -15,6 +15,7 @@ INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) CFLAGS += $(EXTRA_CFLAGS) LDFLAGS += $(EXTRA_LDFLAGS) +LDLIBS += -lelf -lz # Try to detect best kernel BTF source KERNEL_REL := $(shell uname -r) @@ -51,7 +52,7 @@ clean: libbpf_hdrs: $(BPFOBJ) $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) - $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs From 7572c32f8ef2b7f6646b878ad6469f2f8287c5ea Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:28 -0700 Subject: [PATCH 188/534] selftests/bpf: Use pid_t consistently in test_progs.c [ Upstream commit ec4fe2f0fa12fd2d0115df7e58414dc26899cc5e ] Use pid_t rather than __pid_t when allocating memory for 'worker_pids' in 'struct test_env', as this is its declared type and also avoids compile errors seen building against musl libc on mipsel64: test_progs.c:1738:49: error: '__pid_t' undeclared (first use in this function); did you mean 'pid_t'? 1738 | env.worker_pids = calloc(sizeof(__pid_t), env.workers); | ^~~~~~~ | pid_t test_progs.c:1738:49: note: each undeclared identifier is reported only once for each function it appears in Fixes: 91b2c0afd00c ("selftests/bpf: Add parallelism to test_progs") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Geliang Tang Link: https://lore.kernel.org/bpf/c6447da51a94babc1931711a43e2ceecb135c93d.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4d582cac2c09..eed0e4fcf42e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1690,7 +1690,7 @@ int main(int argc, char **argv) /* launch workers if requested */ env.worker_id = -1; /* main process */ if (env.workers) { - env.worker_pids = calloc(sizeof(__pid_t), env.workers); + env.worker_pids = calloc(sizeof(pid_t), env.workers); env.worker_socks = calloc(sizeof(int), env.workers); if (env.debug) fprintf(stdout, "Launching %d workers.\n", env.workers); From db5cde7b4386d147db8835d121f4e43dd0e6bf4d Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:29 -0700 Subject: [PATCH 189/534] selftests/bpf: Fix compile error from rlim_t in sk_storage_map.c [ Upstream commit d393f9479d4aaab0fa4c3caf513f28685e831f13 ] Cast 'rlim_t' argument to match expected type of printf() format and avoid compile errors seen building for mips64el/musl-libc: In file included from map_tests/sk_storage_map.c:20: map_tests/sk_storage_map.c: In function 'test_sk_storage_map_stress_free': map_tests/sk_storage_map.c:414:56: error: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'rlim_t' {aka 'long long unsigned int'} [-Werror=format=] 414 | CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d", | ^~~~~~~~~~~~~~~~~~~~~~~ 415 | rlim_new.rlim_cur, errno); | ~~~~~~~~~~~~~~~~~ | | | rlim_t {aka long long unsigned int} ./test_maps.h:12:24: note: in definition of macro 'CHECK' 12 | printf(format); \ | ^~~~~~ map_tests/sk_storage_map.c:414:68: note: format string is defined here 414 | CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d", | ~~^ | | | long unsigned int | %llu cc1: all warnings being treated as errors Fixes: 51a0e301a563 ("bpf: Add BPF_MAP_TYPE_SK_STORAGE test to test_maps") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1e00a1fa7acf91b4ca135c4102dc796d518bad86.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/map_tests/sk_storage_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 18405c3b7cee..af10c309359a 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -412,7 +412,7 @@ static void test_sk_storage_map_stress_free(void) rlim_new.rlim_max = rlim_new.rlim_cur + 128; err = setrlimit(RLIMIT_NOFILE, &rlim_new); CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d", - rlim_new.rlim_cur, errno); + (unsigned long) rlim_new.rlim_cur, errno); } err = do_sk_storage_map_stress_free(); From 7c877bad03fbff899201c950eca442b8fe2bb660 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:30 -0700 Subject: [PATCH 190/534] selftests/bpf: Fix error compiling bpf_iter_setsockopt.c with musl libc [ Upstream commit 7b10f0c227ce3fa055d601f058dc411092a62a78 ] Existing code calls getsockname() with a 'struct sockaddr_in6 *' argument where a 'struct sockaddr *' argument is declared, yielding compile errors when building for mips64el/musl-libc: bpf_iter_setsockopt.c: In function 'get_local_port': bpf_iter_setsockopt.c:98:30: error: passing argument 2 of 'getsockname' from incompatible pointer type [-Werror=incompatible-pointer-types] 98 | if (!getsockname(fd, &addr, &addrlen)) | ^~~~~ | | | struct sockaddr_in6 * In file included from .../netinet/in.h:10, from .../arpa/inet.h:9, from ./test_progs.h:17, from bpf_iter_setsockopt.c:5: .../sys/socket.h:391:23: note: expected 'struct sockaddr * restrict' but argument is of type 'struct sockaddr_in6 *' 391 | int getsockname (int, struct sockaddr *__restrict, socklen_t *__restrict); | ^ cc1: all warnings being treated as errors This compiled under glibc only because the argument is declared to be a "funky" transparent union which includes both types above. Explicitly cast the argument to allow compiling for both musl and glibc. Fixes: eed92afdd14c ("bpf: selftest: Test batching and bpf_(get|set)sockopt in bpf tcp iter") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Geliang Tang Link: https://lore.kernel.org/bpf/f41def0f17b27a23b1709080e4e3f37f4cc11ca9.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c index b52ff8ce34db..16bed9dd8e6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -95,7 +95,7 @@ static unsigned short get_local_port(int fd) struct sockaddr_in6 addr; socklen_t addrlen = sizeof(addr); - if (!getsockname(fd, &addr, &addrlen)) + if (!getsockname(fd, (struct sockaddr *)&addr, &addrlen)) return ntohs(addr.sin6_port); return 0; From c8c590f07ad7ffaa6ef11e90b81202212077497b Mon Sep 17 00:00:00 2001 From: Tushar Vyavahare Date: Tue, 2 Apr 2024 11:45:25 +0000 Subject: [PATCH 191/534] selftests/bpf: Implement get_hw_ring_size function to retrieve current and max interface size [ Upstream commit 90a695c3d31e1c9f0adb8c4c80028ed4ea7ed5ab ] Introduce a new function called get_hw_size that retrieves both the current and maximum size of the interface and stores this information in the 'ethtool_ringparam' structure. Remove ethtool_channels struct from xdp_hw_metadata.c due to redefinition error. Remove unused linux/if.h include from flow_dissector BPF test to address CI pipeline failure. Signed-off-by: Tushar Vyavahare Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20240402114529.545475-4-tushar.vyavahare@intel.com Stable-dep-of: 69f409469c9b ("selftests/bpf: Drop unneeded error.h includes") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/network_helpers.c | 24 +++++++++++++++++++ tools/testing/selftests/bpf/network_helpers.h | 4 ++++ .../selftests/bpf/prog_tests/flow_dissector.c | 1 - tools/testing/selftests/bpf/xdp_hw_metadata.c | 14 ----------- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 0877b60ec81f..d2acc8875212 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -465,3 +465,27 @@ int get_socket_local_port(int sock_fd) return -1; } + +int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) +{ + struct ifreq ifr = {0}; + int sockfd, err; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) + return -errno; + + memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + ring_param->cmd = ETHTOOL_GRINGPARAM; + ifr.ifr_data = (char *)ring_param; + + if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { + err = errno; + close(sockfd); + return -err; + } + + close(sockfd); + return 0; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5eccc67d1a99..11cbe194769b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -9,8 +9,11 @@ typedef __u16 __sum16; #include #include #include +#include +#include #include #include +#include #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 @@ -60,6 +63,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); int get_socket_local_port(int sock_fd); +int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); struct nstoken; /** diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index c4773173a4e4..9e5f38739104 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index adb77c1a6a74..79f2da8f6ead 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -288,20 +288,6 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t return 0; } -struct ethtool_channels { - __u32 cmd; - __u32 max_rx; - __u32 max_tx; - __u32 max_other; - __u32 max_combined; - __u32 rx_count; - __u32 tx_count; - __u32 other_count; - __u32 combined_count; -}; - -#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ - static int rxq_num(const char *ifname) { struct ethtool_channels ch = { From 103c0431c7fb4790affea121126840dbfb146341 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:31 -0700 Subject: [PATCH 192/534] selftests/bpf: Drop unneeded error.h includes [ Upstream commit 69f409469c9b1515a5db40d5a36fda372376fa2d ] The addition of general support for unprivileged tests in test_loader.c breaks building test_verifier on non-glibc (e.g. musl) systems, due to the inclusion of glibc extension '' in 'unpriv_helpers.c'. However, the header is actually not needed, so remove it to restore building. Similarly for sk_lookup.c and flow_dissector.c, error.h is not necessary and causes problems, so drop them. Fixes: 1d56ade032a4 ("selftests/bpf: Unprivileged tests for test_loader.c") Fixes: 0ab5539f8584 ("selftests/bpf: Tests for BPF_SK_LOOKUP attach point") Fixes: 0905beec9f52 ("selftests/bpf: run flow dissector tests in skb-less mode") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/5664367edf5fea4f3f4b4aec3b182bcfc6edff9c.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 1 - tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 1 - tools/testing/selftests/bpf/unpriv_helpers.c | 1 - 3 files changed, 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 9e5f38739104..9625e6d21791 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index de2466547efe..a1ab0af00454 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 2a6efbd0401e..762e4b5ec955 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -2,7 +2,6 @@ #include #include -#include #include #include "unpriv_helpers.h" From 2388d181667a728f1b91e17d4e9897ba739dcc43 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:34 -0700 Subject: [PATCH 193/534] selftests/bpf: Fix missing ARRAY_SIZE() definition in bench.c [ Upstream commit d44c93fc2f5a0c47b23fa03d374e45259abd92d2 ] Add a "bpf_util.h" include to avoid the following error seen compiling for mips64el with musl libc: bench.c: In function 'find_benchmark': bench.c:590:25: error: implicit declaration of function 'ARRAY_SIZE' [-Werror=implicit-function-declaration] 590 | for (i = 0; i < ARRAY_SIZE(benchs); i++) { | ^~~~~~~~~~ cc1: all warnings being treated as errors Fixes: 8e7c2a023ac0 ("selftests/bpf: Add benchmark runner infrastructure") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/bc4dde77dfcd17a825d8f28f72f3292341966810.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/bench.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 73ce11b0547d..b705cbabe1e2 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -10,6 +10,7 @@ #include #include #include "bench.h" +#include "bpf_util.h" #include "testing_helpers.h" struct env env = { From 4bff8cc537ce4f37b3ec47de7db9523b6c95c11e Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:35 -0700 Subject: [PATCH 194/534] selftests/bpf: Fix missing UINT_MAX definitions in benchmarks [ Upstream commit a2c155131b710959beb508ca6a54769b6b1bd488 ] Include in 'bench.h' to provide a UINT_MAX definition and avoid multiple compile errors against mips64el/musl-libc like: benchs/bench_local_storage.c: In function 'parse_arg': benchs/bench_local_storage.c:40:38: error: 'UINT_MAX' undeclared (first use in this function) 40 | if (ret < 1 || ret > UINT_MAX) { | ^~~~~~~~ benchs/bench_local_storage.c:11:1: note: 'UINT_MAX' is defined in header ''; did you forget to '#include '? 10 | #include +++ |+#include 11 | seen with bench_local_storage.c, bench_local_storage_rcu_tasks_trace.c, and bench_bpf_hashmap_lookup.c. Fixes: 73087489250d ("selftests/bpf: Add benchmark for local_storage get") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/8f64a9d9fcff40a7fca090a65a68a9b62a468e16.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/bench.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 68180d8f8558..005c401b3e22 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -10,6 +10,7 @@ #include #include #include +#include struct cpu_set { bool *cpus; From bedda119babcfa518f07d62e3b46af99d19c87bf Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:36 -0700 Subject: [PATCH 195/534] selftests/bpf: Fix missing BUILD_BUG_ON() declaration [ Upstream commit 6495eb79ca7d15bd87c38d77307e8f9b6b7bf4ef ] Explicitly include '' to fix errors seen compiling with gcc targeting mips64el/musl-libc: user_ringbuf.c: In function 'test_user_ringbuf_loop': user_ringbuf.c:426:9: error: implicit declaration of function 'BUILD_BUG_ON' [-Werror=implicit-function-declaration] 426 | BUILD_BUG_ON(total_samples <= c_max_entries); | ^~~~~~~~~~~~ cc1: all warnings being treated as errors Fixes: e5a9df51c746 ("selftests/bpf: Add selftests validating the user ringbuf") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b28575f9221ec54871c46a2e87612bb4bbf46ccd.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/user_ringbuf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index e51721df14fc..dfff6feac12c 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include From d6e16c33e093f23fc2ee40750d467d1616350250 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 Mar 2024 11:48:59 -0700 Subject: [PATCH 196/534] selftests/bpf: Replace CHECK with ASSERT_* in ns_current_pid_tgid test [ Upstream commit 84239a24d10174fcfc7d6760cb120435a6ff69af ] Replace CHECK in selftest ns_current_pid_tgid with recommended ASSERT_* style. I also shortened subtest name as the prefix of subtest name is covered by the test name already. This patch does fix a testing issue. Currently even if bss->user_{pid,tgid} is not correct, the test still passed since the clone func returns 0. I fixed it to return a non-zero value if bss->user_{pid,tgid} is incorrect. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240315184859.2975543-1-yonghong.song@linux.dev Stable-dep-of: 21f0b0af9772 ("selftests/bpf: Fix include of ") Signed-off-by: Sasha Levin --- .../bpf/prog_tests/ns_current_pid_tgid.c | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 24d493482ffc..3a0664a86243 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -20,19 +20,19 @@ static int test_current_pid_tgid(void *args) { struct test_ns_current_pid_tgid__bss *bss; struct test_ns_current_pid_tgid *skel; - int err = -1, duration = 0; + int ret = -1, err; pid_t tgid, pid; struct stat st; skel = test_ns_current_pid_tgid__open_and_load(); - if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n")) - goto cleanup; + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open_and_load")) + goto out; pid = syscall(SYS_gettid); tgid = getpid(); err = stat("/proc/self/ns/pid", &st); - if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err)) + if (!ASSERT_OK(err, "stat /proc/self/ns/pid")) goto cleanup; bss = skel->bss; @@ -42,24 +42,26 @@ static int test_current_pid_tgid(void *args) bss->user_tgid = 0; err = test_ns_current_pid_tgid__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach")) goto cleanup; /* trigger tracepoint */ usleep(1); - ASSERT_EQ(bss->user_pid, pid, "pid"); - ASSERT_EQ(bss->user_tgid, tgid, "tgid"); - err = 0; + if (!ASSERT_EQ(bss->user_pid, pid, "pid")) + goto cleanup; + if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid")) + goto cleanup; + ret = 0; cleanup: - test_ns_current_pid_tgid__destroy(skel); - - return err; + test_ns_current_pid_tgid__destroy(skel); +out: + return ret; } static void test_ns_current_pid_tgid_new_ns(void) { - int wstatus, duration = 0; + int wstatus; pid_t cpid; /* Create a process in a new namespace, this process @@ -68,21 +70,21 @@ static void test_ns_current_pid_tgid_new_ns(void) cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, CLONE_NEWPID | SIGCHLD, NULL); - if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) + if (!ASSERT_NEQ(cpid, -1, "clone")) return; - if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) + if (!ASSERT_NEQ(waitpid(cpid, &wstatus, 0), -1, "waitpid")) return; - if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) + if (!ASSERT_OK(WEXITSTATUS(wstatus), "newns_pidtgid")) return; } /* TODO: use a different tracepoint */ void serial_test_ns_current_pid_tgid(void) { - if (test__start_subtest("ns_current_pid_tgid_root_ns")) + if (test__start_subtest("root_ns_tp")) test_current_pid_tgid(NULL); - if (test__start_subtest("ns_current_pid_tgid_new_ns")) + if (test__start_subtest("new_ns_tp")) test_ns_current_pid_tgid_new_ns(); } From 17536f3b7262bdd354da1984b021c8b9124fd300 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 Mar 2024 11:49:04 -0700 Subject: [PATCH 197/534] selftests/bpf: Refactor out some functions in ns_current_pid_tgid test [ Upstream commit 4d4bd29e363c467752536f874a2cba10a5923c59 ] Refactor some functions in both user space code and bpf program as these functions are used by later cgroup/sk_msg tests. Another change is to mark tp program optional loading as later patches will use optional loading as well since they have quite different attachment and testing logic. There is no functionality change. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240315184904.2976123-1-yonghong.song@linux.dev Stable-dep-of: 21f0b0af9772 ("selftests/bpf: Fix include of ") Signed-off-by: Sasha Levin --- .../bpf/prog_tests/ns_current_pid_tgid.c | 53 ++++++++++++------- .../bpf/progs/test_ns_current_pid_tgid.c | 10 ++-- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 3a0664a86243..847d7b70e290 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -16,30 +16,46 @@ #define STACK_SIZE (1024 * 1024) static char child_stack[STACK_SIZE]; -static int test_current_pid_tgid(void *args) +static int get_pid_tgid(pid_t *pid, pid_t *tgid, + struct test_ns_current_pid_tgid__bss *bss) +{ + struct stat st; + int err; + + *pid = syscall(SYS_gettid); + *tgid = getpid(); + + err = stat("/proc/self/ns/pid", &st); + if (!ASSERT_OK(err, "stat /proc/self/ns/pid")) + return err; + + bss->dev = st.st_dev; + bss->ino = st.st_ino; + bss->user_pid = 0; + bss->user_tgid = 0; + return 0; +} + +static int test_current_pid_tgid_tp(void *args) { struct test_ns_current_pid_tgid__bss *bss; struct test_ns_current_pid_tgid *skel; int ret = -1, err; pid_t tgid, pid; - struct stat st; - skel = test_ns_current_pid_tgid__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open_and_load")) - goto out; + skel = test_ns_current_pid_tgid__open(); + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open")) + return ret; - pid = syscall(SYS_gettid); - tgid = getpid(); + bpf_program__set_autoload(skel->progs.tp_handler, true); - err = stat("/proc/self/ns/pid", &st); - if (!ASSERT_OK(err, "stat /proc/self/ns/pid")) + err = test_ns_current_pid_tgid__load(skel); + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load")) goto cleanup; bss = skel->bss; - bss->dev = st.st_dev; - bss->ino = st.st_ino; - bss->user_pid = 0; - bss->user_tgid = 0; + if (get_pid_tgid(&pid, &tgid, bss)) + goto cleanup; err = test_ns_current_pid_tgid__attach(skel); if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach")) @@ -55,11 +71,10 @@ static int test_current_pid_tgid(void *args) cleanup: test_ns_current_pid_tgid__destroy(skel); -out: return ret; } -static void test_ns_current_pid_tgid_new_ns(void) +static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) { int wstatus; pid_t cpid; @@ -67,8 +82,8 @@ static void test_ns_current_pid_tgid_new_ns(void) /* Create a process in a new namespace, this process * will be the init process of this new namespace hence will be pid 1. */ - cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, - CLONE_NEWPID | SIGCHLD, NULL); + cpid = clone(fn, child_stack + STACK_SIZE, + CLONE_NEWPID | SIGCHLD, arg); if (!ASSERT_NEQ(cpid, -1, "clone")) return; @@ -84,7 +99,7 @@ static void test_ns_current_pid_tgid_new_ns(void) void serial_test_ns_current_pid_tgid(void) { if (test__start_subtest("root_ns_tp")) - test_current_pid_tgid(NULL); + test_current_pid_tgid_tp(NULL); if (test__start_subtest("new_ns_tp")) - test_ns_current_pid_tgid_new_ns(); + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); } diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c index 0763d49f9c42..aa3ec7ca16d9 100644 --- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -10,17 +10,21 @@ __u64 user_tgid = 0; __u64 dev = 0; __u64 ino = 0; -SEC("tracepoint/syscalls/sys_enter_nanosleep") -int handler(const void *ctx) +static void get_pid_tgid(void) { struct bpf_pidns_info nsdata; if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info))) - return 0; + return; user_pid = nsdata.pid; user_tgid = nsdata.tgid; +} +SEC("?tracepoint/syscalls/sys_enter_nanosleep") +int tp_handler(const void *ctx) +{ + get_pid_tgid(); return 0; } From 4730b07ef7745d7cd48c6aa9f72d75ac136d436f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 Mar 2024 11:49:10 -0700 Subject: [PATCH 198/534] selftests/bpf: Add a cgroup prog bpf_get_ns_current_pid_tgid() test [ Upstream commit 87ade6cd859ea9dbde6e80b3fcf717ed9a73b4a9 ] Add a cgroup bpf program test where the bpf program is running in a pid namespace. The test is successfully: #165/3 ns_current_pid_tgid/new_ns_cgrp:OK Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240315184910.2976522-1-yonghong.song@linux.dev Stable-dep-of: 21f0b0af9772 ("selftests/bpf: Fix include of ") Signed-off-by: Sasha Levin --- .../bpf/prog_tests/ns_current_pid_tgid.c | 73 +++++++++++++++++++ .../bpf/progs/test_ns_current_pid_tgid.c | 7 ++ 2 files changed, 80 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 847d7b70e290..fded38d24aae 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -12,6 +12,7 @@ #include #include #include +#include "network_helpers.h" #define STACK_SIZE (1024 * 1024) static char child_stack[STACK_SIZE]; @@ -74,6 +75,50 @@ cleanup: return ret; } +static int test_current_pid_tgid_cgrp(void *args) +{ + struct test_ns_current_pid_tgid__bss *bss; + struct test_ns_current_pid_tgid *skel; + int server_fd = -1, ret = -1, err; + int cgroup_fd = *(int *)args; + pid_t tgid, pid; + + skel = test_ns_current_pid_tgid__open(); + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open")) + return ret; + + bpf_program__set_autoload(skel->progs.cgroup_bind4, true); + + err = test_ns_current_pid_tgid__load(skel); + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load")) + goto cleanup; + + bss = skel->bss; + if (get_pid_tgid(&pid, &tgid, bss)) + goto cleanup; + + skel->links.cgroup_bind4 = bpf_program__attach_cgroup( + skel->progs.cgroup_bind4, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.cgroup_bind4, "bpf_program__attach_cgroup")) + goto cleanup; + + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto cleanup; + + if (!ASSERT_EQ(bss->user_pid, pid, "pid")) + goto cleanup; + if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid")) + goto cleanup; + ret = 0; + +cleanup: + if (server_fd >= 0) + close(server_fd); + test_ns_current_pid_tgid__destroy(skel); + return ret; +} + static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) { int wstatus; @@ -95,6 +140,25 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) return; } +static void test_in_netns(int (*fn)(void *), void *arg) +{ + struct nstoken *nstoken = NULL; + + SYS(cleanup, "ip netns add ns_current_pid_tgid"); + SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up"); + + nstoken = open_netns("ns_current_pid_tgid"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto cleanup; + + test_ns_current_pid_tgid_new_ns(fn, arg); + +cleanup: + if (nstoken) + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_current_pid_tgid"); +} + /* TODO: use a different tracepoint */ void serial_test_ns_current_pid_tgid(void) { @@ -102,4 +166,13 @@ void serial_test_ns_current_pid_tgid(void) test_current_pid_tgid_tp(NULL); if (test__start_subtest("new_ns_tp")) test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); + if (test__start_subtest("new_ns_cgrp")) { + int cgroup_fd = -1; + + cgroup_fd = test__join_cgroup("/sock_addr"); + if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) { + test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd); + close(cgroup_fd); + } + } } diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c index aa3ec7ca16d9..d0010e698f66 100644 --- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -28,4 +28,11 @@ int tp_handler(const void *ctx) return 0; } +SEC("?cgroup/bind4") +int cgroup_bind4(struct bpf_sock_addr *ctx) +{ + get_pid_tgid(); + return 1; +} + char _license[] SEC("license") = "GPL"; From 52f5ed9461f4be68d4b7a4a7fd44190e5479d470 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:37 -0700 Subject: [PATCH 199/534] selftests/bpf: Fix include of [ Upstream commit 21f0b0af977203220ad58aff95e372151288ec47 ] Update ns_current_pid_tgid.c to use '#include ' and avoid compile error against mips64el/musl libc: In file included from .../prog_tests/ns_current_pid_tgid.c:14: .../include/sys/fcntl.h:1:2: error: #warning redirecting incorrect #include to [-Werror=cpp] 1 | #warning redirecting incorrect #include to | ^~~~~~~ cc1: all warnings being treated as errors Fixes: 09c02d553c49 ("bpf, selftests: Fold test_current_pid_tgid_new_ns into test_progs.") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/8bdc869749177b575025bf69600a4ce591822609.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index fded38d24aae..2c57ceede095 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "network_helpers.h" #define STACK_SIZE (1024 * 1024) From 425d4934e4f8795e9646fdede40a25361f9ba74b Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:38 -0700 Subject: [PATCH 200/534] selftests/bpf: Fix compiling parse_tcp_hdr_opt.c with musl-libc [ Upstream commit 4c329b99ef9c118343379bde9f97e8ce5cac9fc9 ] The GNU version of 'struct tcphdr', with members 'doff' and 'urg_ptr', is not exposed by musl headers unless _GNU_SOURCE is defined. Add this definition to fix errors seen compiling for mips64el/musl-libc: parse_tcp_hdr_opt.c:18:21: error: 'struct tcphdr' has no member named 'urg_ptr' 18 | .pk6_v6.tcp.urg_ptr = 123, | ^~~~~~~ parse_tcp_hdr_opt.c:19:21: error: 'struct tcphdr' has no member named 'doff' 19 | .pk6_v6.tcp.doff = 9, /* 16 bytes of options */ | ^~~~ Fixes: cfa7b011894d ("selftests/bpf: tests for using dynptrs to parse skb and xdp buffers") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/ac5440213c242c62cb4e0d9e0a9cd5058b6a31f6.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c index daa952711d8f..e9c07d561ded 100644 --- a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c +++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include #include "test_parse_tcp_hdr_opt.skel.h" From 7d8d5840453aee9f867d57dd66ff7ca62abaede6 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:39 -0700 Subject: [PATCH 201/534] selftests/bpf: Fix compiling kfree_skb.c with musl-libc [ Upstream commit bae9a5ce7d3a9b3a9e07b31ab9e9c58450e3e9fd ] The GNU version of 'struct tcphdr' with member 'doff' is not exposed by musl headers unless _GNU_SOURCE is defined. Add this definition to fix errors seen compiling for mips64el/musl-libc: In file included from kfree_skb.c:2: kfree_skb.c: In function 'on_sample': kfree_skb.c:45:30: error: 'struct tcphdr' has no member named 'doff' 45 | if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp", | ^ Fixes: 580d656d80cf ("selftests/bpf: Add kfree_skb raw_tp test") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/e2d8cedc790959c10d6822a51f01a7a3616bea1b.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index c07991544a78..34f8822fd221 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include #include "kfree_skb.skel.h" From fe81b3df3cf942bcae89eebdc2ec295f97f2d35e Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:40 -0700 Subject: [PATCH 202/534] selftests/bpf: Fix compiling flow_dissector.c with musl-libc [ Upstream commit 5e4c43bcb85973243d7274e0058b6e8f5810e4f7 ] The GNU version of 'struct tcphdr' has members 'doff', 'source' and 'dest', which are not exposed by musl libc headers unless _GNU_SOURCE is defined. Add this definition to fix errors seen compiling for mips64el/musl-libc: flow_dissector.c:118:30: error: 'struct tcphdr' has no member named 'doff' 118 | .tcp.doff = 5, | ^~~~ flow_dissector.c:119:30: error: 'struct tcphdr' has no member named 'source' 119 | .tcp.source = 80, | ^~~~~~ flow_dissector.c:120:30: error: 'struct tcphdr' has no member named 'dest' 120 | .tcp.dest = 8080, | ^~~~ Fixes: ae173a915785 ("selftests/bpf: support BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/8f7ab21a73f678f9cebd32b26c444a686e57414d.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 9625e6d21791..3171047414a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include #include From 227b50fe66eba3720bb32e6fe66fc7064b042a70 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:41 -0700 Subject: [PATCH 203/534] selftests/bpf: Fix compiling tcp_rtt.c with musl-libc [ Upstream commit 18826fb0b79c3c3cd1fe765d85f9c6f1a902c722 ] The GNU version of 'struct tcp_info' in 'netinet/tcp.h' is not exposed by musl headers unless _GNU_SOURCE is defined. Add this definition to fix errors seen compiling for mips64el/musl-libc: tcp_rtt.c: In function 'wait_for_ack': tcp_rtt.c:24:25: error: storage size of 'info' isn't known 24 | struct tcp_info info; | ^~~~ tcp_rtt.c:24:25: error: unused variable 'info' [-Werror=unused-variable] cc1: all warnings being treated as errors Fixes: 1f4f80fed217 ("selftests/bpf: test_progs: convert test_tcp_rtt") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/f2329767b15df206f08a5776d35a47c37da855ae.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 8fe84da1b9b4..6a2da7a64419 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include "cgroup_helpers.h" #include "network_helpers.h" From 397192f8147664f316f5ee34894f2bc8294b2e6e Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:42 -0700 Subject: [PATCH 204/534] selftests/bpf: Fix compiling core_reloc.c with musl-libc [ Upstream commit debfa4f628f271f72933bf38d581cc53cfe1def5 ] The type 'loff_t' is a GNU extension and not exposed by the musl 'fcntl.h' header unless _GNU_SOURCE is defined. Add this definition to fix errors seen compiling for mips64el/musl-libc: In file included from tools/testing/selftests/bpf/prog_tests/core_reloc.c:4: ./bpf_testmod/bpf_testmod.h:10:9: error: unknown type name 'loff_t' 10 | loff_t off; | ^~~~~~ ./bpf_testmod/bpf_testmod.h:16:9: error: unknown type name 'loff_t' 16 | loff_t off; | ^~~~~~ Fixes: 6bcd39d366b6 ("selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/11c3af75a7eb6bcb7ad9acfae6a6f470c572eb82.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 47f42e680105..26019313e1fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include "progs/core_reloc_types.h" #include "bpf_testmod/bpf_testmod.h" From 0bc023e2f6003898da1a6e29c4e1f70efd31a38a Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:43 -0700 Subject: [PATCH 205/534] selftests/bpf: Fix errors compiling lwt_redirect.c with musl libc [ Upstream commit 27c4797ce51c8dd51e35e68e9024a892f62d78b2 ] Remove a redundant include of '' which is already provided in 'lwt_helpers.h'. This avoids errors seen compiling for mips64el/musl-libc: In file included from .../arpa/inet.h:9, from lwt_redirect.c:51: .../netinet/in.h:23:8: error: redefinition of 'struct in6_addr' 23 | struct in6_addr { | ^~~~~~~~ In file included from .../linux/icmp.h:24, from lwt_redirect.c:50: .../linux/in6.h:33:8: note: originally defined here 33 | struct in6_addr { | ^~~~~~~~ .../netinet/in.h:34:8: error: redefinition of 'struct sockaddr_in6' 34 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../linux/in6.h:50:8: note: originally defined here 50 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../netinet/in.h:42:8: error: redefinition of 'struct ipv6_mreq' 42 | struct ipv6_mreq { | ^~~~~~~~~ .../linux/in6.h:60:8: note: originally defined here 60 | struct ipv6_mreq { | ^~~~~~~~~ Fixes: 43a7c3ef8a15 ("selftests/bpf: Add lwt_xmit tests for BPF_REDIRECT") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/3869dda876d5206d2f8d4dd67331c739ceb0c7f8.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/lwt_redirect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index 2bc932a18c17..ea326e26656f 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include From 96416a7e4884b78bf8681f5d6a9678f523050f90 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:44 -0700 Subject: [PATCH 206/534] selftests/bpf: Fix errors compiling decap_sanity.c with musl libc [ Upstream commit 1b00f355130a5dfc38a01ad02458ae2cb2ebe609 ] Remove a redundant include of '', whose needed definitions are already provided by 'test_progs.h'. This avoids errors seen compiling for mips64el/musl-libc: In file included from .../arpa/inet.h:9, from ./test_progs.h:17, from prog_tests/decap_sanity.c:9: .../netinet/in.h:23:8: error: redefinition of 'struct in6_addr' 23 | struct in6_addr { | ^~~~~~~~ In file included from decap_sanity.c:7: .../linux/in6.h:33:8: note: originally defined here 33 | struct in6_addr { | ^~~~~~~~ .../netinet/in.h:34:8: error: redefinition of 'struct sockaddr_in6' 34 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../linux/in6.h:50:8: note: originally defined here 50 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../netinet/in.h:42:8: error: redefinition of 'struct ipv6_mreq' 42 | struct ipv6_mreq { | ^~~~~~~~~ .../linux/in6.h:60:8: note: originally defined here 60 | struct ipv6_mreq { | ^~~~~~~~~ Fixes: 70a00e2f1dba ("selftests/bpf: Test bpf_skb_adjust_room on CHECKSUM_PARTIAL") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/e986ba2d7edccd254b54f7cd049b98f10bafa8c3.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 5c0ebe6ba866..95ea5a6a5f18 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -4,7 +4,6 @@ #include #include #include -#include #include "test_progs.h" #include "network_helpers.h" From d57f8de839e4fb36208813a5320273a034a902ad Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 22 Jul 2024 22:54:46 -0700 Subject: [PATCH 207/534] selftests/bpf: Fix errors compiling cg_storage_multi.h with musl libc [ Upstream commit 730561d3c08d4a327cceaabf11365958a1c00cec ] Remove a redundant include of '', whose needed definitions are already included (via '') in cg_storage_multi_egress_only.c, cg_storage_multi_isolated.c, and cg_storage_multi_shared.c. This avoids redefinition errors seen compiling for mips64el/musl-libc like: In file included from progs/cg_storage_multi_egress_only.c:13: In file included from progs/cg_storage_multi.h:6: In file included from /usr/mips64el-linux-gnuabi64/include/asm/types.h:23: /usr/include/asm-generic/int-l64.h:29:25: error: typedef redefinition with different types ('long' vs 'long long') 29 | typedef __signed__ long __s64; | ^ /usr/include/asm-generic/int-ll64.h:30:44: note: previous definition is here 30 | __extension__ typedef __signed__ long long __s64; | ^ Fixes: 9e5bd1f7633b ("selftests/bpf: Test CGROUP_STORAGE map can't be used by multiple progs") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/4f4702e9f6115b7f84fea01b2326ca24c6df7ba8.1721713597.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/progs/cg_storage_multi.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h index a0778fe7857a..41d59f0ee606 100644 --- a/tools/testing/selftests/bpf/progs/cg_storage_multi.h +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h @@ -3,8 +3,6 @@ #ifndef __PROGS_CG_STORAGE_MULTI_H #define __PROGS_CG_STORAGE_MULTI_H -#include - struct cgroup_value { __u32 egress_pkts; __u32 ingress_pkts; From 564d1abf506b180e5682afcb89dfa8e0a4a995e7 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:18 -0700 Subject: [PATCH 208/534] selftests/bpf: Fix arg parsing in veristat, test_progs [ Upstream commit 03bfcda1fbc37ef34aa21d2b9e09138335afc6ee ] Current code parses arguments with strtok_r() using a construct like char *state = NULL; while ((next = strtok_r(state ? NULL : input, ",", &state))) { ... } where logic assumes the 'state' var can distinguish between first and subsequent strtok_r() calls, and adjusts parameters accordingly. However, 'state' is strictly internal context for strtok_r() and no such assumptions are supported in the man page. Moreover, the exact behaviour of 'state' depends on the libc implementation, making the above code fragile. Indeed, invoking "./test_progs -t " on mips64el/musl will hang, with the above code in an infinite loop. Similarly, we see strange behaviour running 'veristat' on mips64el/musl: $ ./veristat -e file,prog,verdict,insns -C two-ok add-failure Can't specify more than 9 stats Rewrite code using a counter to distinguish between strtok_r() calls. Fixes: 61ddff373ffa ("selftests/bpf: Improve by-name subtest selection logic in prog_tests") Fixes: 394169b079b5 ("selftests/bpf: add comparison mode to veristat") Fixes: c8bc5e050976 ("selftests/bpf: Add veristat tool for mass-verifying BPF object files") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/392d8bf5559f85fa37926c1494e62312ef252c3d.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/testing_helpers.c | 4 ++-- tools/testing/selftests/bpf/veristat.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 8d994884c7b4..6acffe0426f0 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -220,13 +220,13 @@ int parse_test_list(const char *s, bool is_glob_pattern) { char *input, *state = NULL, *test_spec; - int err = 0; + int err = 0, cnt = 0; input = strdup(s); if (!input) return -ENOMEM; - while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) { + while ((test_spec = strtok_r(cnt++ ? NULL : input, ",", &state))) { err = insert_test(set, test_spec, is_glob_pattern); if (err) break; diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 0ad98b6a8e6e..611b5a0a6f7e 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -753,13 +753,13 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) static int parse_stats(const char *stats_str, struct stat_specs *specs) { char *input, *state = NULL, *next; - int err; + int err, cnt = 0; input = strdup(stats_str); if (!input) return -ENOMEM; - while ((next = strtok_r(state ? NULL : input, ",", &state))) { + while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) { err = parse_stat(next, specs); if (err) return err; @@ -1444,7 +1444,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs, while (fgets(line, sizeof(line), f)) { char *input = line, *state = NULL, *next; struct verif_stats *st = NULL; - int col = 0; + int col = 0, cnt = 0; if (!header) { void *tmp; @@ -1462,7 +1462,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs, *stat_cntp += 1; } - while ((next = strtok_r(state ? NULL : input, ",\n", &state))) { + while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) { if (header) { /* for the first line, set up spec stats */ err = parse_stat(next, specs); From 99c03869599cf474922fe3343977d2f686f59543 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:19 -0700 Subject: [PATCH 209/534] selftests/bpf: Fix error compiling test_lru_map.c [ Upstream commit cacf2a5a78cd1f5f616eae043ebc6f024104b721 ] Although the post-increment in macro 'CPU_SET(next++, &cpuset)' seems safe, the sequencing can raise compile errors, so move the increment outside the macro. This avoids an error seen using gcc 12.3.0 for mips64el/musl-libc: In file included from test_lru_map.c:11: test_lru_map.c: In function 'sched_next_online': test_lru_map.c:129:29: error: operation on 'next' may be undefined [-Werror=sequence-point] 129 | CPU_SET(next++, &cpuset); | ^ cc1: all warnings being treated as errors Fixes: 3fbfadce6012 ("bpf: Fix test_lru_sanity5() in test_lru_map.c") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/22993dfb11ccf27925a626b32672fd3324cb76c4.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_lru_map.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 4d0650cfb5cd..fda7589c5023 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -126,7 +126,8 @@ static int sched_next_online(int pid, int *next_to_try) while (next < nr_cpus) { CPU_ZERO(&cpuset); - CPU_SET(next++, &cpuset); + CPU_SET(next, &cpuset); + next++; if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) { ret = 0; break; From fb99b106ad3874b4dc431d09f4569049e1387172 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:20 -0700 Subject: [PATCH 210/534] selftests/bpf: Fix C++ compile error from missing _Bool type [ Upstream commit aa95073fd290b5b3e45f067fa22bb25e59e1ff7c ] While building, bpftool makes a skeleton from test_core_extern.c, which itself includes and uses the 'bool' type. However, the skeleton test_core_extern.skel.h generated *does not* include or use the 'bool' type, instead using the C-only '_Bool' type. Compiling test_cpp.cpp with g++ 12.3 for mips64el/musl-libc then fails with error: In file included from test_cpp.cpp:9: test_core_extern.skel.h:45:17: error: '_Bool' does not name a type 45 | _Bool CONFIG_BOOL; | ^~~~~ This was likely missed previously because glibc uses a GNU extension for with C++ (#define _Bool bool), not supported by musl libc. Normally, a C fragment would include and use the 'bool' type, and thus cleanly work after import by C++. The ideal fix would be for 'bpftool gen skeleton' to output the correct type/include supporting C++, but in the meantime add a conditional define as above. Fixes: 7c8dce4b1661 ("bpftool: Make skeleton C code compilable with C++ compiler") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/6fc1dd28b8bda49e51e4f610bdc9d22f4455632d.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_cpp.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index f4936834f76f..435341c25420 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -6,6 +6,10 @@ #include #include #include + +#ifndef _Bool +#define _Bool bool +#endif #include "test_core_extern.skel.h" template From a7d322fd3be85608209684104688cae1e259b807 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 4 Feb 2024 21:29:14 -0800 Subject: [PATCH 211/534] selftests/bpf: Fix flaky selftest lwt_redirect/lwt_reroute [ Upstream commit e7f31873176a345d72ca77c7b4da48493ccd9efd ] Recently, when running './test_progs -j', I occasionally hit the following errors: test_lwt_redirect:PASS:pthread_create 0 nsec test_lwt_redirect_run:FAIL:netns_create unexpected error: 256 (errno 0) #142/2 lwt_redirect/lwt_redirect_normal_nomac:FAIL #142 lwt_redirect:FAIL test_lwt_reroute:PASS:pthread_create 0 nsec test_lwt_reroute_run:FAIL:netns_create unexpected error: 256 (errno 0) test_lwt_reroute:PASS:pthread_join 0 nsec #143/2 lwt_reroute/lwt_reroute_qdisc_dropped:FAIL #143 lwt_reroute:FAIL The netns_create() definition looks like below: #define NETNS "ns_lwt" static inline int netns_create(void) { return system("ip netns add " NETNS); } One possibility is that both lwt_redirect and lwt_reroute create netns with the same name "ns_lwt" which may cause conflict. I tried the following example: $ sudo ip netns add abc $ echo $? 0 $ sudo ip netns add abc Cannot create namespace file "/var/run/netns/abc": File exists $ echo $? 1 $ The return code for above netns_create() is 256. The internet search suggests that the return value for 'ip netns add ns_lwt' is 1, which matches the above 'sudo ip netns add abc' example. This patch tried to use different netns names for two tests to avoid 'ip netns add ' failure. I ran './test_progs -j' 10 times and all succeeded with lwt_redirect/lwt_reroute tests. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240205052914.1742687-1-yonghong.song@linux.dev Stable-dep-of: 16b795cc5952 ("selftests/bpf: Fix redefinition errors compiling lwt_reroute.c") Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/lwt_helpers.h | 2 -- tools/testing/selftests/bpf/prog_tests/lwt_redirect.c | 1 + tools/testing/selftests/bpf/prog_tests/lwt_reroute.c | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index 61333f2a03f9..68c08309dbc8 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -27,8 +27,6 @@ } \ }) -#define NETNS "ns_lwt" - static inline int netns_create(void) { return system("ip netns add " NETNS); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c index ea326e26656f..7b458ae5f0f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -53,6 +53,7 @@ #include #include +#define NETNS "ns_lwt_redirect" #include "lwt_helpers.h" #include "test_progs.h" #include "network_helpers.h" diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c index f4bb2d5fcae0..c104f07f1b0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -48,6 +48,7 @@ * For case 2, force UDP packets to overflow fq limit. As long as kernel * is not crashed, it is considered successful. */ +#define NETNS "ns_lwt_reroute" #include "lwt_helpers.h" #include "network_helpers.h" #include From 7824530b80ba9d00e91d55c2f557c49915947631 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:21 -0700 Subject: [PATCH 212/534] selftests/bpf: Fix redefinition errors compiling lwt_reroute.c [ Upstream commit 16b795cc59528cf280abc79af3c70bda42f715b9 ] Compiling lwt_reroute.c with GCC 12.3 for mips64el/musl-libc yields errors: In file included from .../include/arpa/inet.h:9, from ./test_progs.h:18, from tools/testing/selftests/bpf/prog_tests/lwt_helpers.h:11, from tools/testing/selftests/bpf/prog_tests/lwt_reroute.c:52: .../include/netinet/in.h:23:8: error: redefinition of 'struct in6_addr' 23 | struct in6_addr { | ^~~~~~~~ In file included from .../include/linux/icmp.h:24, from tools/testing/selftests/bpf/prog_tests/lwt_helpers.h:9: .../include/linux/in6.h:33:8: note: originally defined here 33 | struct in6_addr { | ^~~~~~~~ .../include/netinet/in.h:34:8: error: redefinition of 'struct sockaddr_in6' 34 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../include/linux/in6.h:50:8: note: originally defined here 50 | struct sockaddr_in6 { | ^~~~~~~~~~~~ .../include/netinet/in.h:42:8: error: redefinition of 'struct ipv6_mreq' 42 | struct ipv6_mreq { | ^~~~~~~~~ .../include/linux/in6.h:60:8: note: originally defined here 60 | struct ipv6_mreq { | ^~~~~~~~~ These errors occur because is included before , bypassing the Linux uapi/libc compat mechanism's partial musl support. As described in [1] and [2], fix these errors by including in lwt_reroute.c before any uapi headers. [1]: commit c0bace798436 ("uapi libc compat: add fallback for unsupported libcs") [2]: https://git.musl-libc.org/cgit/musl/commit/?id=04983f227238 Fixes: 6c77997bc639 ("selftests/bpf: Add lwt_xmit tests for BPF_REROUTE") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/bd2908aec0755ba8b75f5dc41848b00585f5c73e.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/lwt_reroute.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c index c104f07f1b0b..920ee3042d09 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -49,6 +49,7 @@ * is not crashed, it is considered successful. */ #define NETNS "ns_lwt_reroute" +#include #include "lwt_helpers.h" #include "network_helpers.h" #include From 8553067f1caec863963b38f4f7e1f23af17c667e Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:22 -0700 Subject: [PATCH 213/534] selftests/bpf: Fix compile if backtrace support missing in libc [ Upstream commit c9a83e76b5a96801a2c7ea0a79ca77c356d8b38d ] Include GNU header only with glibc and provide weak, stubbed backtrace functions as a fallback in test_progs.c. This allows for non-GNU replacements while avoiding compile errors (e.g. with musl libc) like: test_progs.c:13:10: fatal error: execinfo.h: No such file or directory 13 | #include /* backtrace */ | ^~~~~~~~~~~~ test_progs.c: In function 'crash_handler': test_progs.c:1034:14: error: implicit declaration of function 'backtrace' [-Werror=implicit-function-declaration] 1034 | sz = backtrace(bt, ARRAY_SIZE(bt)); | ^~~~~~~~~ test_progs.c:1045:9: error: implicit declaration of function 'backtrace_symbols_fd' [-Werror=implicit-function-declaration] 1045 | backtrace_symbols_fd(bt, sz, STDERR_FILENO); | ^~~~~~~~~~~~~~~~~~~~ Fixes: 9fb156bb82a3 ("selftests/bpf: Print backtrace on SIGSEGV in test_progs") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/aa6dc8e23710cb457b278039d0081de7e7b4847d.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_progs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index eed0e4fcf42e..74620ed3a166 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,7 +10,6 @@ #include #include #include -#include /* backtrace */ #include /* get_nprocs */ #include #include @@ -19,6 +18,21 @@ #include #include "json_writer.h" +#ifdef __GLIBC__ +#include /* backtrace */ +#endif + +/* Default backtrace funcs if missing at link */ +__weak int backtrace(void **buffer, int size) +{ + return 0; +} + +__weak void backtrace_symbols_fd(void *const *buffer, int size, int fd) +{ + dprintf(fd, "\n"); +} + static bool verbose(void) { return env.verbosity > VERBOSE_NONE; From 33ef0b25b02217a875675e7963f44771340ee69c Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 29 Jul 2024 02:24:24 -0700 Subject: [PATCH 214/534] selftests/bpf: Fix error compiling tc_redirect.c with musl libc [ Upstream commit 21c5f4f55da759c7444a1ef13e90b6e6f674eeeb ] Linux 5.1 implemented 64-bit time types and related syscalls to address the Y2038 problem generally across archs. Userspace handling of Y2038 varies with the libc however. While musl libc uses 64-bit time across all 32-bit and 64-bit platforms, GNU glibc uses 64-bit time on 64-bit platforms but defaults to 32-bit time on 32-bit platforms unless they "opt-in" to 64-bit time or explicitly use 64-bit syscalls and time structures. One specific area is the standard setsockopt() call, SO_TIMESTAMPNS option used for timestamping, and the related output 'struct timespec'. GNU glibc defaults as above, also exposing the SO_TIMESTAMPNS_NEW flag to explicitly use a 64-bit call and 'struct __kernel_timespec'. Since these are not exposed or needed with musl libc, their use in tc_redirect.c leads to compile errors building for mips64el/musl: tc_redirect.c: In function 'rcv_tstamp': tc_redirect.c:425:32: error: 'SO_TIMESTAMPNS_NEW' undeclared (first use in this function); did you mean 'SO_TIMESTAMPNS'? 425 | cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) | ^~~~~~~~~~~~~~~~~~ | SO_TIMESTAMPNS tc_redirect.c:425:32: note: each undeclared identifier is reported only once for each function it appears in tc_redirect.c: In function 'test_inet_dtime': tc_redirect.c:491:49: error: 'SO_TIMESTAMPNS_NEW' undeclared (first use in this function); did you mean 'SO_TIMESTAMPNS'? 491 | err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, | ^~~~~~~~~~~~~~~~~~ | SO_TIMESTAMPNS However, using SO_TIMESTAMPNS_NEW isn't strictly needed, nor is Y2038 being explicitly tested. The timestamp checks in tc_redirect.c are simple: the packet receive timestamp is non-zero and processed/handled in less than 5 seconds. Switch to using the standard setsockopt() call and SO_TIMESTAMPNS option to ensure compatibility across glibc and musl libc. In the worst-case, there is a 5-second window 14 years from now where tc_redirect tests may fail on 32-bit systems. However, we should reasonably expect glibc to adopt a 64-bit mandate rather than the current "opt-in" policy before the Y2038 roll-over. Fixes: ce6f6cffaeaa ("selftests/bpf: Wait for the netstamp_needed_key static key to be turned on") Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/031d656c058b4e55ceae56ef49c4e1729b5090f3.1722244708.git.tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index af3c31f82a8a..5a6401733587 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -421,7 +421,7 @@ static int set_forwarding(bool enable) static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) { - struct __kernel_timespec pkt_ts = {}; + struct timespec pkt_ts = {}; char ctl[CMSG_SPACE(sizeof(pkt_ts))]; struct timespec now_ts; struct msghdr msg = {}; @@ -445,7 +445,7 @@ static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) cmsg = CMSG_FIRSTHDR(&msg); if (cmsg && cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) + cmsg->cmsg_type == SO_TIMESTAMPNS) memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; @@ -487,9 +487,9 @@ static int wait_netstamp_needed_key(void) if (!ASSERT_GE(srv_fd, 0, "start_server")) goto done; - err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS, &opt, sizeof(opt)); - if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) + if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)")) goto done; cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS); @@ -571,9 +571,9 @@ static void test_inet_dtime(int family, int type, const char *addr, __u16 port) return; /* Ensure the kernel puts the (rcv) timestamp for all skb */ - err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS, &opt, sizeof(opt)); - if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) + if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)")) goto done; if (type == SOCK_STREAM) { From fc2b89707e478ccc7127e59bb202dc847c8918f8 Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Thu, 15 Aug 2024 21:55:24 +0800 Subject: [PATCH 215/534] samples/bpf: Fix compilation errors with cf-protection option [ Upstream commit fdf1c728fac541891ef1aa773bfd42728626769c ] Currently, compiling the bpf programs will result the compilation errors with the cf-protection option as follows in arm64 and loongarch64 machine when using gcc 12.3.1 and clang 17.0.6. This commit fixes the compilation errors by limited the cf-protection option only used in x86 platform. [root@localhost linux]# make M=samples/bpf ...... CLANG-bpf samples/bpf/xdp2skb_meta_kern.o error: option 'cf-protection=return' cannot be specified on this target error: option 'cf-protection=branch' cannot be specified on this target 2 errors generated. CLANG-bpf samples/bpf/syscall_tp_kern.o error: option 'cf-protection=return' cannot be specified on this target error: option 'cf-protection=branch' cannot be specified on this target 2 errors generated. ...... Fixes: 34f6e38f58db ("samples/bpf: fix warning with ignored-attributes") Reported-by: Jiangshan Yi Signed-off-by: Jiangshan Yi Signed-off-by: Andrii Nakryiko Tested-by: Qiang Wang Link: https://lore.kernel.org/bpf/20240815135524.140675-1-13667453960@163.com Signed-off-by: Sasha Levin --- samples/bpf/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4ccf4236031c..3fa16412db15 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -166,6 +166,10 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic endif endif +ifeq ($(ARCH), x86) +BPF_EXTRA_CFLAGS += -fcf-protection +endif + TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes @@ -394,7 +398,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ - -fno-asynchronous-unwind-tables -fcf-protection \ + -fno-asynchronous-unwind-tables \ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ From 2288b54b96dcb55bedebcef3572bb8821fc5e708 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Aug 2024 01:01:23 -0700 Subject: [PATCH 216/534] bpf: correctly handle malformed BPF_CORE_TYPE_ID_LOCAL relos [ Upstream commit 3d2786d65aaa954ebd3fcc033ada433e10da21c4 ] In case of malformed relocation record of kind BPF_CORE_TYPE_ID_LOCAL referencing a non-existing BTF type, function bpf_core_calc_relo_insn would cause a null pointer deference. Fix this by adding a proper check upper in call stack, as malformed relocation records could be passed from user space. Simplest reproducer is a program: r0 = 0 exit With a single relocation record: .insn_off = 0, /* patch first instruction */ .type_id = 100500, /* this type id does not exist */ .access_str_off = 6, /* offset of string "0" */ .kind = BPF_CORE_TYPE_ID_LOCAL, See the link for original reproducer or next commit for a test case. Fixes: 74753e1462e7 ("libbpf: Replace btf__type_by_id() with btf_type_by_id().") Reported-by: Liu RuiTong Closes: https://lore.kernel.org/bpf/CAK55_s6do7C+DVwbwY_7nKfUz0YLDoiA1v6X3Y9+p0sWzipFSA@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240822080124.2995724-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/btf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index fbf9721ba21b..e0e4d4f490e8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8421,6 +8421,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, struct bpf_core_cand_list cands = {}; struct bpf_core_relo_res targ_res; struct bpf_core_spec *specs; + const struct btf_type *type; int err; /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" @@ -8430,6 +8431,13 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, if (!specs) return -ENOMEM; + type = btf_type_by_id(ctx->btf, relo->type_id); + if (!type) { + bpf_log(ctx->log, "relo #%u: bad type id %u\n", + relo_idx, relo->type_id); + return -EINVAL; + } + if (need_cands) { struct bpf_cand_cache *cc; int i; From b7d6e724e42a41adb0666376260b95cc27360cab Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:24 +0300 Subject: [PATCH 217/534] xz: cleanup CRC32 edits from 2018 [ Upstream commit 2ee96abef214550d9e92f5143ee3ac1fd1323e67 ] In 2018, a dependency on was added to avoid duplicating the same constant in multiple files. Two months later it was found to be a bad idea and the definition of CRC32_POLY_LE macro was moved into xz_private.h to avoid including . xz_private.h is a wrong place for it too. Revert back to the upstream version which has the poly in xz_crc32_init() in xz_crc32.c. Link: https://lkml.kernel.org/r/20240721133633.47721-10-lasse.collin@tukaani.org Fixes: faa16bc404d7 ("lib: Use existing define with polynomial") Fixes: 242cdad873a7 ("lib/xz: Put CRC32_POLY_LE in xz_private.h") Signed-off-by: Lasse Collin Reviewed-by: Sam James Tested-by: Michael Ellerman (powerpc) Cc: Krzysztof Kozlowski Cc: Herbert Xu Cc: Joel Stanley Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- lib/xz/xz_crc32.c | 2 +- lib/xz/xz_private.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 88a2c35e1b59..5627b00fca29 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -29,7 +29,7 @@ STATIC_RW_DATA uint32_t xz_crc32_table[256]; XZ_EXTERN void xz_crc32_init(void) { - const uint32_t poly = CRC32_POLY_LE; + const uint32_t poly = 0xEDB88320; uint32_t i; uint32_t j; diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index bf1e94ec7873..d9fd49b45fd7 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -105,10 +105,6 @@ # endif #endif -#ifndef CRC32_POLY_LE -#define CRC32_POLY_LE 0xedb88320 -#endif - /* * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used * before calling xz_dec_lzma2_run(). From cfd257f5e85b40bdd4e53f87a0536b39a157623b Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 27 Aug 2024 19:23:08 +0800 Subject: [PATCH 218/534] kthread: fix task state in kthread worker if being frozen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e16c7b07784f3fb03025939c4590b9a7c64970a7 ] When analyzing a kernel waring message, Peter pointed out that there is a race condition when the kworker is being frozen and falls into try_to_freeze() with TASK_INTERRUPTIBLE, which could trigger a might_sleep() warning in try_to_freeze(). Although the root cause is not related to freeze()[1], it is still worthy to fix this issue ahead. One possible race scenario: CPU 0 CPU 1 ----- ----- // kthread_worker_fn set_current_state(TASK_INTERRUPTIBLE); suspend_freeze_processes() freeze_processes static_branch_inc(&freezer_active); freeze_kernel_threads pm_nosig_freezing = true; if (work) { //false __set_current_state(TASK_RUNNING); } else if (!freezing(current)) //false, been frozen freezing(): if (static_branch_unlikely(&freezer_active)) if (pm_nosig_freezing) return true; schedule() } // state is still TASK_INTERRUPTIBLE try_to_freeze() might_sleep() <--- warning Fix this by explicitly set the TASK_RUNNING before entering try_to_freeze(). Link: https://lore.kernel.org/lkml/Zs2ZoAcUsZMX2B%2FI@chenyu5-mobl2/ [1] Link: https://lkml.kernel.org/r/20240827112308.181081-1-yu.c.chen@intel.com Fixes: b56c0d8937e6 ("kthread: implement kthread_worker") Signed-off-by: Chen Yu Suggested-by: Peter Zijlstra Suggested-by: Andrew Morton Cc: Andreas Gruenbacher Cc: David Gow Cc: Mateusz Guzik Cc: Mickaël Salaün Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/kthread.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 290cbc845225..ac03bc55e17c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -844,8 +844,16 @@ repeat: * event only cares about the address. */ trace_sched_kthread_work_execute_end(work, func); - } else if (!freezing(current)) + } else if (!freezing(current)) { schedule(); + } else { + /* + * Handle the case where the current remains + * TASK_INTERRUPTIBLE. try_to_freeze() expects + * the current to be TASK_RUNNING. + */ + __set_current_state(TASK_RUNNING); + } try_to_freeze(); cond_resched(); From e4006410b0f53a91f332c9b90ea35637cbc21659 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Sat, 17 Aug 2024 16:55:10 +0800 Subject: [PATCH 219/534] ext4: clear EXT4_GROUP_INFO_WAS_TRIMMED_BIT even mount with discard [ Upstream commit 20cee68f5b44fdc2942d20f3172a262ec247b117 ] Commit 3d56b8d2c74c ("ext4: Speed up FITRIM by recording flags in ext4_group_info") speed up fstrim by skipping trim trimmed group. We also has the chance to clear trimmed once there exists some block free for this group(mount without discard), and the next trim for this group will work well too. For mount with discard, we will issue dicard when we free blocks, so leave trimmed flag keep alive to skip useless trim trigger from userspace seems reasonable. But for some case like ext4 build on dm-thinpool(ext4 blocksize 4K, pool blocksize 128K), discard from ext4 maybe unaligned for dm thinpool, and thinpool will just finish this discard(see process_discard_bio when begein equals to end) without actually process discard. For this case, trim from userspace can really help us to free some thinpool block. So convert to clear trimmed flag for all case no matter mounted with discard or not. Fixes: 3d56b8d2c74c ("ext4: Speed up FITRIM by recording flags in ext4_group_info") Signed-off-by: yangerkun Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240817085510.2084444-1-yangerkun@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 870397f3de55..87ba7f58216f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3886,11 +3886,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb, /* * Clear the trimmed flag for the group so that the next * ext4_trim_fs can trim it. - * If the volume is mounted with -o discard, online discard - * is supported and the free blocks will be trimmed online. */ - if (!test_opt(sb, DISCARD)) - EXT4_MB_GRP_CLEAR_TRIMMED(db); + EXT4_MB_GRP_CLEAR_TRIMMED(db); if (!db->bb_free_root.rb_node) { /* No more items in the per group rb tree @@ -6586,8 +6583,9 @@ do_more: " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } else - EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); + } + + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); From 72eef5226fbef8b8d6108f55becb301f79ec2b3e Mon Sep 17 00:00:00 2001 From: Jiawei Ye Date: Mon, 2 Sep 2024 08:47:26 +0000 Subject: [PATCH 220/534] smackfs: Use rcu_assign_pointer() to ensure safe assignment in smk_set_cipso [ Upstream commit 2749749afa071f8a0e405605de9da615e771a7ce ] In the `smk_set_cipso` function, the `skp->smk_netlabel.attr.mls.cat` field is directly assigned to a new value without using the appropriate RCU pointer assignment functions. According to RCU usage rules, this is illegal and can lead to unpredictable behavior, including data inconsistencies and impossible-to-diagnose memory corruption issues. This possible bug was identified using a static analysis tool developed by myself, specifically designed to detect RCU-related issues. To address this, the assignment is now done using rcu_assign_pointer(), which ensures that the pointer assignment is done safely, with the necessary memory barriers and synchronization. This change prevents potential RCU dereference issues by ensuring that the `cat` field is safely updated while still adhering to RCU's requirements. Fixes: 0817534ff9ea ("smackfs: Fix use-after-free in netlbl_catmap_walk()") Signed-off-by: Jiawei Ye Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smackfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e22aad7604e8..5dd1e164f9b1 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -932,7 +932,7 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf, } if (rc >= 0) { old_cat = skp->smk_netlabel.attr.mls.cat; - skp->smk_netlabel.attr.mls.cat = ncats.attr.mls.cat; + rcu_assign_pointer(skp->smk_netlabel.attr.mls.cat, ncats.attr.mls.cat); skp->smk_netlabel.attr.mls.lvl = ncats.attr.mls.lvl; synchronize_rcu(); netlbl_catmap_free(old_cat); From 7a349feead81c61ad122cace70531bf8e13adf70 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 20 Aug 2024 21:22:28 +0800 Subject: [PATCH 221/534] ext4: avoid buffer_head leak in ext4_mark_inode_used() [ Upstream commit 5e5b2a56c57def1b41efd49596621504d7bcc61c ] Release inode_bitmap_bh from ext4_read_inode_bitmap() in ext4_mark_inode_used() to avoid buffer_head leak. By the way, remove unneeded goto for invalid ino when inode_bitmap_bh is NULL. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Kemeng Shi Link: https://patch.msgid.link/20240820132234.2759926-2-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ialloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b65058d972f9..b4699a415c21 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -755,10 +755,10 @@ int ext4_mark_inode_used(struct super_block *sb, int ino) struct ext4_group_desc *gdp; ext4_group_t group; int bit; - int err = -EFSCORRUPTED; + int err; if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) - goto out; + return -EFSCORRUPTED; group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); @@ -860,6 +860,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino) err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh); sync_dirty_buffer(group_desc_bh); out: + brelse(inode_bitmap_bh); return err; } From fae0793abdab022c0778a2440108e8cd7bb6d7d0 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 20 Aug 2024 21:22:29 +0800 Subject: [PATCH 222/534] ext4: avoid potential buffer_head leak in __ext4_new_inode() [ Upstream commit 227d31b9214d1b9513383cf6c7180628d4b3b61f ] If a group is marked EXT4_GROUP_INFO_IBITMAP_CORRUPT after it's inode bitmap buffer_head was successfully verified, then __ext4_new_inode() will get a valid inode_bitmap_bh of a corrupted group from ext4_read_inode_bitmap() in which case inode_bitmap_bh misses a release. Hnadle "IS_ERR(inode_bitmap_bh)" and group corruption separately like how ext4_free_inode() does to avoid buffer_head leak. Fixes: 9008a58e5dce ("ext4: make the bitmap read routines return real error codes") Signed-off-by: Kemeng Shi Link: https://patch.msgid.link/20240820132234.2759926-3-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ialloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b4699a415c21..d7d89133ed36 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1054,12 +1054,13 @@ got_group: brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); /* Skip groups with suspicious inode tables */ - if (((!(sbi->s_mount_state & EXT4_FC_REPLAY)) - && EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || - IS_ERR(inode_bitmap_bh)) { + if (IS_ERR(inode_bitmap_bh)) { inode_bitmap_bh = NULL; goto next_group; } + if (!(sbi->s_mount_state & EXT4_FC_REPLAY) && + EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + goto next_group; repeat_in_this_group: ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino); From 9f70768554acf61327fc8352bd5ae313a72d68c4 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 20 Aug 2024 21:22:30 +0800 Subject: [PATCH 223/534] ext4: avoid negative min_clusters in find_group_orlov() [ Upstream commit bb0a12c3439b10d88412fd3102df5b9a6e3cd6dc ] min_clusters is signed integer and will be converted to unsigned integer when compared with unsigned number stats.free_clusters. If min_clusters is negative, it will be converted to a huge unsigned value in which case all groups may not meet the actual desired free clusters. Set negative min_clusters to 0 to avoid unexpected behavior. Fixes: ac27a0ec112a ("[PATCH] ext4: initial copy of files from ext3") Signed-off-by: Kemeng Shi Link: https://patch.msgid.link/20240820132234.2759926-4-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ialloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d7d89133ed36..1a1e2214c581 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -514,6 +514,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, if (min_inodes < 1) min_inodes = 1; min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; + if (min_clusters < 0) + min_clusters = 0; /* * Start looking in the flex group where we last allocated an From dd3f90e8c470f650658eac552cd6a6e7b082b358 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 21 Aug 2024 12:23:22 -0300 Subject: [PATCH 224/534] ext4: return error on ext4_find_inline_entry [ Upstream commit 4d231b91a944f3cab355fce65af5871fb5d7735b ] In case of errors when reading an inode from disk or traversing inline directory entries, return an error-encoded ERR_PTR instead of returning NULL. ext4_find_inline_entry only caller, __ext4_find_entry already returns such encoded errors. Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://patch.msgid.link/20240821152324.3621860-3-cascardo@igalia.com Signed-off-by: Theodore Ts'o Stable-dep-of: c6b72f5d82b1 ("ext4: avoid OOB when system.data xattr changes underneath the filesystem") Signed-off-by: Sasha Levin --- fs/ext4/inline.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a604aa1d23ae..7adc6634d523 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1670,8 +1670,9 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, void *inline_start; int inline_size; - if (ext4_get_inode_loc(dir, &iloc)) - return NULL; + ret = ext4_get_inode_loc(dir, &iloc); + if (ret) + return ERR_PTR(ret); down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { @@ -1702,7 +1703,10 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, out: brelse(iloc.bh); - iloc.bh = NULL; + if (ret < 0) + iloc.bh = ERR_PTR(ret); + else + iloc.bh = NULL; out_find: up_read(&EXT4_I(dir)->xattr_sem); return iloc.bh; From 2a6579ef5f2576a940125729f7409cc182f1c8df Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 21 Aug 2024 12:23:24 -0300 Subject: [PATCH 225/534] ext4: avoid OOB when system.data xattr changes underneath the filesystem [ Upstream commit c6b72f5d82b1017bad80f9ebf502832fc321d796 ] When looking up for an entry in an inlined directory, if e_value_offs is changed underneath the filesystem by some change in the block device, it will lead to an out-of-bounds access that KASAN detects as an UAF. EXT4-fs (loop0): mounted filesystem 00000000-0000-0000-0000-000000000000 r/w without journal. Quota mode: none. loop0: detected capacity change from 2048 to 2047 ================================================================== BUG: KASAN: use-after-free in ext4_search_dir+0xf2/0x1c0 fs/ext4/namei.c:1500 Read of size 1 at addr ffff88803e91130f by task syz-executor269/5103 CPU: 0 UID: 0 PID: 5103 Comm: syz-executor269 Not tainted 6.11.0-rc4-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 ext4_search_dir+0xf2/0x1c0 fs/ext4/namei.c:1500 ext4_find_inline_entry+0x4be/0x5e0 fs/ext4/inline.c:1697 __ext4_find_entry+0x2b4/0x1b30 fs/ext4/namei.c:1573 ext4_lookup_entry fs/ext4/namei.c:1727 [inline] ext4_lookup+0x15f/0x750 fs/ext4/namei.c:1795 lookup_one_qstr_excl+0x11f/0x260 fs/namei.c:1633 filename_create+0x297/0x540 fs/namei.c:3980 do_symlinkat+0xf9/0x3a0 fs/namei.c:4587 __do_sys_symlinkat fs/namei.c:4610 [inline] __se_sys_symlinkat fs/namei.c:4607 [inline] __x64_sys_symlinkat+0x95/0xb0 fs/namei.c:4607 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f3e73ced469 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 21 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff4d40c258 EFLAGS: 00000246 ORIG_RAX: 000000000000010a RAX: ffffffffffffffda RBX: 0032656c69662f2e RCX: 00007f3e73ced469 RDX: 0000000020000200 RSI: 00000000ffffff9c RDI: 00000000200001c0 RBP: 0000000000000000 R08: 00007fff4d40c290 R09: 00007fff4d40c290 R10: 0023706f6f6c2f76 R11: 0000000000000246 R12: 00007fff4d40c27c R13: 0000000000000003 R14: 431bde82d7b634db R15: 00007fff4d40c2b0 Calling ext4_xattr_ibody_find right after reading the inode with ext4_get_inode_loc will lead to a check of the validity of the xattrs, avoiding this problem. Reported-by: syzbot+0c2508114d912a54ee79@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c2508114d912a54ee79 Fixes: e8e948e7802a ("ext4: let ext4_find_entry handle inline data") Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://patch.msgid.link/20240821152324.3621860-5-cascardo@igalia.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inline.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 7adc6634d523..cb65052ee3de 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1665,25 +1665,36 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM, + .name = EXT4_XATTR_SYSTEM_DATA, + }; int ret; - struct ext4_iloc iloc; void *inline_start; int inline_size; - ret = ext4_get_inode_loc(dir, &iloc); + ret = ext4_get_inode_loc(dir, &is.iloc); if (ret) return ERR_PTR(ret); down_read(&EXT4_I(dir)->xattr_sem); + + ret = ext4_xattr_ibody_find(dir, &i, &is); + if (ret) + goto out; + if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } - inline_start = (void *)ext4_raw_inode(&iloc)->i_block + + inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_search_dir(iloc.bh, inline_start, inline_size, + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; @@ -1693,23 +1704,23 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) goto out; - inline_start = ext4_get_inline_xattr_pos(dir, &iloc); + inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; - ret = ext4_search_dir(iloc.bh, inline_start, inline_size, + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; out: - brelse(iloc.bh); + brelse(is.iloc.bh); if (ret < 0) - iloc.bh = ERR_PTR(ret); + is.iloc.bh = ERR_PTR(ret); else - iloc.bh = NULL; + is.iloc.bh = NULL; out_find: up_read(&EXT4_I(dir)->xattr_sem); - return iloc.bh; + return is.iloc.bh; } int ext4_delete_inline_entry(handle_t *handle, From faeff8b1ee2eaa5969c8e994d66c3337298cefed Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Fri, 30 Aug 2024 12:50:57 +0530 Subject: [PATCH 226/534] ext4: check stripe size compatibility on remount as well [ Upstream commit ee85e0938aa8f9846d21e4d302c3cf6a2a75110d ] We disable stripe size in __ext4_fill_super if it is not a multiple of the cluster ratio however this check is missed when trying to remount. This can leave us with cases where stripe < cluster_ratio after remount:set making EXT4_B2C(sbi->s_stripe) become 0 that can cause some unforeseen bugs like divide by 0. Fix that by adding the check in remount path as well. Reported-by: syzbot+1ad8bac5af24d01e2cbd@syzkaller.appspotmail.com Tested-by: syzbot+1ad8bac5af24d01e2cbd@syzkaller.appspotmail.com Reviewed-by: Kemeng Shi Reviewed-by: Ritesh Harjani (IBM) Fixes: c3defd99d58c ("ext4: treat stripe in block unit") Signed-off-by: Ojaswin Mujoo Link: https://patch.msgid.link/3a493bb503c3598e25dcfbed2936bb2dff3fece7.1725002410.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5baacb3058ab..46c4f7504979 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5205,6 +5205,18 @@ static int ext4_block_group_meta_init(struct super_block *sb, int silent) return 0; } +/* + * It's hard to get stripe aligned blocks if stripe is not aligned with + * cluster, just disable stripe and alert user to simplify code and avoid + * stripe aligned allocation which will rarely succeed. + */ +static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + return (stripe > 0 && sbi->s_cluster_ratio > 1 && + stripe % sbi->s_cluster_ratio != 0); +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct ext4_super_block *es = NULL; @@ -5312,13 +5324,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount3; sbi->s_stripe = ext4_get_stripe_size(sbi); - /* - * It's hard to get stripe aligned blocks if stripe is not aligned with - * cluster, just disable stripe and alert user to simpfy code and avoid - * stripe aligned allocation which will rarely successes. - */ - if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 && - sbi->s_stripe % sbi->s_cluster_ratio != 0) { + if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) { ext4_msg(sb, KERN_WARNING, "stripe (%lu) is not aligned with cluster size (%u), " "stripe is disabled", @@ -6482,6 +6488,15 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } + if ((ctx->spec & EXT4_SPEC_s_stripe) && + ext4_is_stripe_incompatible(sb, ctx->s_stripe)) { + ext4_msg(sb, KERN_WARNING, + "stripe (%lu) is not aligned with cluster size (%u), " + "stripe is disabled", + ctx->s_stripe, sbi->s_cluster_ratio); + ctx->s_stripe = 0; + } + /* * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause * two calls to ext4_should_dioread_nolock() to return inconsistent From ba4eb7f25886e55c9bc183214e0b107b834b5503 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:38 +0100 Subject: [PATCH 227/534] sched/numa: Document vma_numab_state fields [ Upstream commit 9ae5c00ea2e600a8b823f9b95606dd244f3096bf ] Document the intended usage of the fields. [ mingo: Reformatted to take less vertical space & tidied it up. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-2-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- include/linux/mm_types.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ba25777ec0a7..b53a13d15bba 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -551,8 +551,29 @@ struct vma_lock { }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; + + /* + * Time in jiffies when access_pids[] is reset to + * detect phase change behaviour: + */ unsigned long next_pid_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ unsigned long access_pids[2]; }; From 707e9a6c880f82d17a684278b8842b08f885c203 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:39 +0100 Subject: [PATCH 228/534] sched/numa: Rename vma_numab_state::access_pids[] => ::pids_active[], ::next_pid_reset => ::pids_active_reset [ Upstream commit f3a6c97940fbd25d6c84c2d5642338fc99a9b35b ] The access_pids[] field name is somewhat ambiguous as no PIDs are accessed. Similarly, it's not clear that next_pid_reset is related to access_pids[]. Rename the fields to more accurately reflect their purpose. [ mingo: Rename in the comments too. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-3-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- include/linux/mm.h | 4 ++-- include/linux/mm_types.h | 6 +++--- kernel/sched/fair.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 830b925c2d00..b6a4d6471b4a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1732,8 +1732,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b53a13d15bba..80d9d1b7685c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -559,10 +559,10 @@ struct vma_numab_state { unsigned long next_scan; /* - * Time in jiffies when access_pids[] is reset to + * Time in jiffies when pids_active[] is reset to * detect phase change behaviour: */ - unsigned long next_pid_reset; + unsigned long pids_active_reset; /* * Approximate tracking of PIDs that trapped a NUMA hinting @@ -574,7 +574,7 @@ struct vma_numab_state { * Window moves after next_pid_reset has expired approximately * every VMA_PID_RESET_PERIOD jiffies: */ - unsigned long access_pids[2]; + unsigned long pids_active[2]; }; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5fc0d9cc9d9d..d07fb0a0da80 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3200,7 +3200,7 @@ static bool vma_is_accessed(struct vm_area_struct *vma) if (READ_ONCE(current->mm->numa_scan_seq) < 2) return true; - pids = vma->numab_state->access_pids[0] | vma->numab_state->access_pids[1]; + pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1]; return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids); } @@ -3316,7 +3316,7 @@ static void task_numa_work(struct callback_head *work) msecs_to_jiffies(sysctl_numa_balancing_scan_delay); /* Reset happens after 4 times scan delay of scan start */ - vma->numab_state->next_pid_reset = vma->numab_state->next_scan + + vma->numab_state->pids_active_reset = vma->numab_state->next_scan + msecs_to_jiffies(VMA_PID_RESET_PERIOD); } @@ -3337,11 +3337,11 @@ static void task_numa_work(struct callback_head *work) * vma for recent access to avoid clearing PID info before access.. */ if (mm->numa_scan_seq && - time_after(jiffies, vma->numab_state->next_pid_reset)) { - vma->numab_state->next_pid_reset = vma->numab_state->next_pid_reset + + time_after(jiffies, vma->numab_state->pids_active_reset)) { + vma->numab_state->pids_active_reset = vma->numab_state->pids_active_reset + msecs_to_jiffies(VMA_PID_RESET_PERIOD); - vma->numab_state->access_pids[0] = READ_ONCE(vma->numab_state->access_pids[1]); - vma->numab_state->access_pids[1] = 0; + vma->numab_state->pids_active[0] = READ_ONCE(vma->numab_state->pids_active[1]); + vma->numab_state->pids_active[1] = 0; } do { From 6654e54ae7e7cb2ceb3da85e72ebb7487996a9d9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:40 +0100 Subject: [PATCH 229/534] sched/numa: Trace decisions related to skipping VMAs [ Upstream commit ed2da8b725b932b1e2b2f4835bb664d47ed03031 ] NUMA balancing skips or scans VMAs for a variety of reasons. In preparation for completing scans of VMAs regardless of PID access, trace the reasons why a VMA was skipped. In a later patch, the tracing will be used to track if a VMA was forcibly scanned. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-4-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- include/linux/sched/numa_balancing.h | 8 +++++ include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++ kernel/sched/fair.c | 17 +++++++--- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..c127a1509e2f 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,6 +15,14 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index fbb99a61f714..b0d0dbf491ea 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -664,6 +664,56 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) ); +#ifdef CONFIG_NUMA_BALANCING +#define NUMAB_SKIP_REASON \ + EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \ + EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ + EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ + EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ + EMe(NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) + +/* Redefine for export. */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +NUMAB_SKIP_REASON + +/* Redefine for symbolic printing. */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(sched_skip_vma_numa, + + TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma, + enum numa_vmaskip_reason reason), + + TP_ARGS(mm, vma, reason), + + TP_STRUCT__entry( + __field(unsigned long, numa_scan_offset) + __field(unsigned long, vm_start) + __field(unsigned long, vm_end) + __field(enum numa_vmaskip_reason, reason) + ), + + TP_fast_assign( + __entry->numa_scan_offset = mm->numa_scan_offset; + __entry->vm_start = vma->vm_start; + __entry->vm_end = vma->vm_end; + __entry->reason = reason; + ), + + TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s", + __entry->numa_scan_offset, + __entry->vm_start, + __entry->vm_end, + __print_symbolic(__entry->reason, NUMAB_SKIP_REASON)) +); +#endif /* CONFIG_NUMA_BALANCING */ /* * Tracepoint for waking a polling cpu without an IPI. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d07fb0a0da80..7c5f6e94b3cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3285,6 +3285,7 @@ static void task_numa_work(struct callback_head *work) do { if (!vma_migratable(vma) || !vma_policy_mof(vma) || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE); continue; } @@ -3295,15 +3296,19 @@ static void task_numa_work(struct callback_head *work) * as migrating the pages will be of marginal benefit. */ if (!vma->vm_mm || - (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) + (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SHARED_RO); continue; + } /* * Skip inaccessible VMAs to avoid any confusion between * PROT_NONE and NUMA hinting ptes */ - if (!vma_is_accessible(vma)) + if (!vma_is_accessible(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_INACCESSIBLE); continue; + } /* Initialise new per-VMA NUMAB state. */ if (!vma->numab_state) { @@ -3325,12 +3330,16 @@ static void task_numa_work(struct callback_head *work) * delay the scan for new VMAs. */ if (mm->numa_scan_seq && time_before(jiffies, - vma->numab_state->next_scan)) + vma->numab_state->next_scan)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SCAN_DELAY); continue; + } /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(vma)) + if (!vma_is_accessed(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; + } /* * RESET access PIDs regularly for old VMAs. Resetting after checking From 7f01977665d700924f24d9113d3fe99a96d0f47b Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Tue, 10 Oct 2023 09:31:41 +0100 Subject: [PATCH 230/534] sched/numa: Move up the access pid reset logic [ Upstream commit 2e2675db1906ac04809f5399bf1f5e30d56a6f3e ] Recent NUMA hinting faulting activity is reset approximately every VMA_PID_RESET_PERIOD milliseconds. However, if the current task has not accessed a VMA then the reset check is missed and the reset is potentially deferred forever. Check if the PID activity information should be reset before checking if the current task recently trapped a NUMA hinting fault. [ mgorman@techsingularity.net: Rewrite changelog ] Suggested-by: Mel Gorman Signed-off-by: Raghavendra K T Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-5-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c5f6e94b3cd..07363b73ccdc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3335,16 +3335,7 @@ static void task_numa_work(struct callback_head *work) continue; } - /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(vma)) { - trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); - continue; - } - - /* - * RESET access PIDs regularly for old VMAs. Resetting after checking - * vma for recent access to avoid clearing PID info before access.. - */ + /* RESET access PIDs regularly for old VMAs. */ if (mm->numa_scan_seq && time_after(jiffies, vma->numab_state->pids_active_reset)) { vma->numab_state->pids_active_reset = vma->numab_state->pids_active_reset + @@ -3353,6 +3344,12 @@ static void task_numa_work(struct callback_head *work) vma->numab_state->pids_active[1] = 0; } + /* Do not scan the VMA if task has not accessed */ + if (!vma_is_accessed(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); + continue; + } + do { start = max(start, vma->vm_start); end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE); From cb7846df6b4f347fa6bd27aaff83db8c144bea74 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:42 +0100 Subject: [PATCH 231/534] sched/numa: Complete scanning of partial VMAs regardless of PID activity [ Upstream commit b7a5b537c55c088d891ae554103d1b281abef781 ] NUMA Balancing skips VMAs when the current task has not trapped a NUMA fault within the VMA. If the VMA is skipped then mm->numa_scan_offset advances and a task that is trapping faults within the VMA may never fully update PTEs within the VMA. Force tasks to update PTEs for partially scanned PTEs. The VMA will be tagged for NUMA hints by some task but this removes some of the benefit of tracking PID activity within a VMA. A follow-on patch will mitigate this problem. The test cases and machines evaluated did not trigger the corner case so the performance results are neutral with only small changes within the noise from normal test-to-test variance. However, the next patch makes the corner case easier to trigger. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-6-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- include/linux/sched/numa_balancing.h | 1 + include/trace/events/sched.h | 3 ++- kernel/sched/fair.c | 18 +++++++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index c127a1509e2f..7dcc0bdfddbb 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -21,6 +21,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_INACCESSIBLE, NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b0d0dbf491ea..27b51c81b106 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -670,7 +670,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ - EMe(NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) + EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ + EMe(NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 07363b73ccdc..03eb1cab320d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3188,7 +3188,7 @@ static void reset_ptenuma_scan(struct task_struct *p) p->mm->numa_scan_offset = 0; } -static bool vma_is_accessed(struct vm_area_struct *vma) +static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma) { unsigned long pids; /* @@ -3201,7 +3201,19 @@ static bool vma_is_accessed(struct vm_area_struct *vma) return true; pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1]; - return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids); + if (test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids)) + return true; + + /* + * Complete a scan that has already started regardless of PID access, or + * some VMAs may never be scanned in multi-threaded applications: + */ + if (mm->numa_scan_offset > vma->vm_start) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_IGNORE_PID); + return true; + } + + return false; } #define VMA_PID_RESET_PERIOD (4 * sysctl_numa_balancing_scan_delay) @@ -3345,7 +3357,7 @@ static void task_numa_work(struct callback_head *work) } /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(vma)) { + if (!vma_is_accessed(mm, vma)) { trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; } From e3a2d3f6c40e749ad431a1cb7fd5dd78a02ab592 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:43 +0100 Subject: [PATCH 232/534] sched/numa: Complete scanning of inactive VMAs when there is no alternative [ Upstream commit f169c62ff7cd1acf8bac8ae17bfeafa307d9e6fa ] VMAs are skipped if there is no recent fault activity but this represents a chicken-and-egg problem as there may be no fault activity if the PTEs are never updated to trap NUMA hints. There is an indirect reliance on scanning to be forced early in the lifetime of a task but this may fail to detect changes in phase behaviour. Force inactive VMAs to be scanned when all other eligible VMAs have been updated within the same scan sequence. Test results in general look good with some changes in performance, both negative and positive, depending on whether the additional scanning and faulting was beneficial or not to the workload. The autonuma benchmark workload NUMA01_THREADLOCAL was picked for closer examination. The workload creates two processes with numerous threads and thread-local storage that is zero-filled in a loop. It exercises the corner case where unrelated threads may skip VMAs that are thread-local to another thread and still has some VMAs that inactive while the workload executes. The VMA skipping activity frequency with and without the patch: 6.6.0-rc2-sched-numabtrace-v1 ============================= 649 reason=scan_delay 9,094 reason=unsuitable 48,915 reason=shared_ro 143,919 reason=inaccessible 193,050 reason=pid_inactive 6.6.0-rc2-sched-numabselective-v1 ============================= 146 reason=seq_completed 622 reason=ignore_pid_inactive 624 reason=scan_delay 6,570 reason=unsuitable 16,101 reason=shared_ro 27,608 reason=inaccessible 41,939 reason=pid_inactive Note that with the patch applied, the PID activity is ignored (ignore_pid_inactive) to ensure a VMA with some activity is completely scanned. In addition, a small number of VMAs are scanned when no other eligible VMA is available during a single scan window (seq_completed). The number of times a VMA is skipped due to no PID activity from the scanning task (pid_inactive) drops dramatically. It is expected that this will increase the number of PTEs updated for NUMA hinting faults as well as hinting faults but these represent PTEs that would otherwise have been missed. The tradeoff is scan+fault overhead versus improving locality due to migration. On a 2-socket Cascade Lake test machine, the time to complete the workload is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Min elsp-NUMA01_THREADLOCAL 174.22 ( 0.00%) 117.64 ( 32.48%) Amean elsp-NUMA01_THREADLOCAL 175.68 ( 0.00%) 123.34 * 29.79%* Stddev elsp-NUMA01_THREADLOCAL 1.20 ( 0.00%) 4.06 (-238.20%) CoeffVar elsp-NUMA01_THREADLOCAL 0.68 ( 0.00%) 3.29 (-381.70%) Max elsp-NUMA01_THREADLOCAL 177.18 ( 0.00%) 128.03 ( 27.74%) The time to complete the workload is reduced by almost 30%: 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 / Duration User 91201.80 63506.64 Duration System 2015.53 1819.78 Duration Elapsed 1234.77 868.37 In this specific case, system CPU time was not increased but it's not universally true. From vmstat, the NUMA scanning and fault activity is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Ops NUMA base-page range updates 64272.00 26374386.00 Ops NUMA PTE updates 36624.00 55538.00 Ops NUMA PMD updates 54.00 51404.00 Ops NUMA hint faults 15504.00 75786.00 Ops NUMA hint local faults % 14860.00 56763.00 Ops NUMA hint local percent 95.85 74.90 Ops NUMA pages migrated 1629.00 6469222.00 Both the number of PTE updates and hint faults is dramatically increased. While this is superficially unfortunate, it represents ranges that were simply skipped without the patch. As a result of the scanning and hinting faults, many more pages were also migrated but as the time to completion is reduced, the overhead is offset by the gain. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-7-mgorman@techsingularity.net Stable-dep-of: f22cde4371f3 ("sched/numa: Fix the vma scan starving issue") Signed-off-by: Sasha Levin --- include/linux/mm_types.h | 6 +++ include/linux/sched/numa_balancing.h | 1 + include/trace/events/sched.h | 3 +- kernel/sched/fair.c | 55 ++++++++++++++++++++++++++-- 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 80d9d1b7685c..43c19d85dfe7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -575,6 +575,12 @@ struct vma_numab_state { * every VMA_PID_RESET_PERIOD jiffies: */ unsigned long pids_active[2]; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 7dcc0bdfddbb..b69afb8630db 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -22,6 +22,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 27b51c81b106..010ba1b7cb0e 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -671,7 +671,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ - EMe(NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) + EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \ + EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03eb1cab320d..0af2be3ee849 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3233,6 +3233,8 @@ static void task_numa_work(struct callback_head *work) unsigned long nr_pte_updates = 0; long pages, virtpages; struct vma_iterator vmi; + bool vma_pids_skipped; + bool vma_pids_forced = false; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -3275,7 +3277,6 @@ static void task_numa_work(struct callback_head *work) */ p->node_stamp += 2 * TICK_NSEC; - start = mm->numa_scan_offset; pages = sysctl_numa_balancing_scan_size; pages <<= 20 - PAGE_SHIFT; /* MB in pages */ virtpages = pages * 8; /* Scan up to this much virtual space */ @@ -3285,6 +3286,16 @@ static void task_numa_work(struct callback_head *work) if (!mmap_read_trylock(mm)) return; + + /* + * VMAs are skipped if the current PID has not trapped a fault within + * the VMA recently. Allow scanning to be forced if there is no + * suitable VMA remaining. + */ + vma_pids_skipped = false; + +retry_pids: + start = mm->numa_scan_offset; vma_iter_init(&vmi, mm, start); vma = vma_next(&vmi); if (!vma) { @@ -3335,6 +3346,13 @@ static void task_numa_work(struct callback_head *work) /* Reset happens after 4 times scan delay of scan start */ vma->numab_state->pids_active_reset = vma->numab_state->next_scan + msecs_to_jiffies(VMA_PID_RESET_PERIOD); + + /* + * Ensure prev_scan_seq does not match numa_scan_seq, + * to prevent VMAs being skipped prematurely on the + * first scan: + */ + vma->numab_state->prev_scan_seq = mm->numa_scan_seq - 1; } /* @@ -3356,8 +3374,19 @@ static void task_numa_work(struct callback_head *work) vma->numab_state->pids_active[1] = 0; } - /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(mm, vma)) { + /* Do not rescan VMAs twice within the same sequence. */ + if (vma->numab_state->prev_scan_seq == mm->numa_scan_seq) { + mm->numa_scan_offset = vma->vm_end; + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SEQ_COMPLETED); + continue; + } + + /* + * Do not scan the VMA if task has not accessed it, unless no other + * VMA candidate exists. + */ + if (!vma_pids_forced && !vma_is_accessed(mm, vma)) { + vma_pids_skipped = true; trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; } @@ -3386,8 +3415,28 @@ static void task_numa_work(struct callback_head *work) cond_resched(); } while (end != vma->vm_end); + + /* VMA scan is complete, do not scan until next sequence. */ + vma->numab_state->prev_scan_seq = mm->numa_scan_seq; + + /* + * Only force scan within one VMA at a time, to limit the + * cost of scanning a potentially uninteresting VMA. + */ + if (vma_pids_forced) + break; } for_each_vma(vmi, vma); + /* + * If no VMAs are remaining and VMAs were skipped due to the PID + * not accessing the VMA previously, then force a scan to ensure + * forward progress: + */ + if (!vma && !vma_pids_forced && vma_pids_skipped) { + vma_pids_forced = true; + goto retry_pids; + } + out: /* * It is possible to reach the end of the VMA list but the last few From 66f3fc741177848f0b5dcb51cc93328fbafc43f0 Mon Sep 17 00:00:00 2001 From: Yujie Liu Date: Tue, 27 Aug 2024 19:29:58 +0800 Subject: [PATCH 233/534] sched/numa: Fix the vma scan starving issue [ Upstream commit f22cde4371f3c624e947a35b075c06c771442a43 ] Problem statement: Since commit fc137c0ddab2 ("sched/numa: enhance vma scanning logic"), the Numa vma scan overhead has been reduced a lot. Meanwhile, the reducing of the vma scan might create less Numa page fault information. The insufficient information makes it harder for the Numa balancer to make decision. Later, commit b7a5b537c55c08 ("sched/numa: Complete scanning of partial VMAs regardless of PID activity") and commit 84db47ca7146d7 ("sched/numa: Fix mm numa_scan_seq based unconditional scan") are found to bring back part of the performance. Recently when running SPECcpu omnetpp_r on a 320 CPUs/2 Sockets system, a long duration of remote Numa node read was observed by PMU events: A few cores having ~500MB/s remote memory access for ~20 seconds. It causes high core-to-core variance and performance penalty. After the investigation, it is found that many vmas are skipped due to the active PID check. According to the trace events, in most cases, vma_is_accessed() returns false because the history access info stored in pids_active array has been cleared. Proposal: The main idea is to adjust vma_is_accessed() to let it return true easier. Thus compare the diff between mm->numa_scan_seq and vma->numab_state->prev_scan_seq. If the diff has exceeded the threshold, scan the vma. This patch especially helps the cases where there are small number of threads, like the process-based SPECcpu. Without this patch, if the SPECcpu process access the vma at the beginning, then sleeps for a long time, the pid_active array will be cleared. A a result, if this process is woken up again, it never has a chance to set prot_none anymore. Because only the first 2 times of access is granted for vma scan: (current->mm->numa_scan_seq) - vma->numab_state->start_scan_seq) < 2 to be worse, no other threads within the task can help set the prot_none. This causes information lost. Raghavendra helped test current patch and got the positive result on the AMD platform: autonumabench NUMA01 base patched Amean syst-NUMA01 194.05 ( 0.00%) 165.11 * 14.92%* Amean elsp-NUMA01 324.86 ( 0.00%) 315.58 * 2.86%* Duration User 380345.36 368252.04 Duration System 1358.89 1156.23 Duration Elapsed 2277.45 2213.25 autonumabench NUMA02 Amean syst-NUMA02 1.12 ( 0.00%) 1.09 * 2.93%* Amean elsp-NUMA02 3.50 ( 0.00%) 3.56 * -1.84%* Duration User 1513.23 1575.48 Duration System 8.33 8.13 Duration Elapsed 28.59 29.71 kernbench Amean user-256 22935.42 ( 0.00%) 22535.19 * 1.75%* Amean syst-256 7284.16 ( 0.00%) 7608.72 * -4.46%* Amean elsp-256 159.01 ( 0.00%) 158.17 * 0.53%* Duration User 68816.41 67615.74 Duration System 21873.94 22848.08 Duration Elapsed 506.66 504.55 Intel 256 CPUs/2 Sockets: autonuma benchmark also shows improvements: v6.10-rc5 v6.10-rc5 +patch Amean syst-NUMA01 245.85 ( 0.00%) 230.84 * 6.11%* Amean syst-NUMA01_THREADLOCAL 205.27 ( 0.00%) 191.86 * 6.53%* Amean syst-NUMA02 18.57 ( 0.00%) 18.09 * 2.58%* Amean syst-NUMA02_SMT 2.63 ( 0.00%) 2.54 * 3.47%* Amean elsp-NUMA01 517.17 ( 0.00%) 526.34 * -1.77%* Amean elsp-NUMA01_THREADLOCAL 99.92 ( 0.00%) 100.59 * -0.67%* Amean elsp-NUMA02 15.81 ( 0.00%) 15.72 * 0.59%* Amean elsp-NUMA02_SMT 13.23 ( 0.00%) 12.89 * 2.53%* v6.10-rc5 v6.10-rc5 +patch Duration User 1064010.16 1075416.23 Duration System 3307.64 3104.66 Duration Elapsed 4537.54 4604.73 The SPECcpu remote node access issue disappears with the patch applied. Link: https://lkml.kernel.org/r/20240827112958.181388-1-yu.c.chen@intel.com Fixes: fc137c0ddab2 ("sched/numa: enhance vma scanning logic") Signed-off-by: Chen Yu Co-developed-by: Chen Yu Signed-off-by: Yujie Liu Reported-by: Xiaoping Zhou Reviewed-and-tested-by: Raghavendra K T Acked-by: Mel Gorman Cc: "Chen, Tim C" Cc: Ingo Molnar Cc: Juri Lelli Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Vincent Guittot Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0af2be3ee849..5eb4807bad20 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3213,6 +3213,15 @@ static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma) return true; } + /* + * This vma has not been accessed for a while, and if the number + * the threads in the same process is low, which means no other + * threads can help scan this vma, force a vma scan. + */ + if (READ_ONCE(mm->numa_scan_seq) > + (vma->numab_state->prev_scan_seq + get_nr_threads(current))) + return true; + return false; } From 21839b6fbc3c41b3e374ecbdb0cabbbb2c53cf34 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:07 +0900 Subject: [PATCH 234/534] nilfs2: fix potential null-ptr-deref in nilfs_btree_insert() [ Upstream commit 9403001ad65ae4f4c5de368bdda3a0636b51d51a ] Patch series "nilfs2: fix potential issues with empty b-tree nodes". This series addresses three potential issues with empty b-tree nodes that can occur with corrupted filesystem images, including one recently discovered by syzbot. This patch (of 3): If a b-tree is broken on the device, and the b-tree height is greater than 2 (the level of the root node is greater than 1) even if the number of child nodes of the b-tree root is 0, a NULL pointer dereference occurs in nilfs_btree_prepare_insert(), which is called from nilfs_btree_insert(). This is because, when the number of child nodes of the b-tree root is 0, nilfs_btree_do_lookup() does not set the block buffer head in any of path[x].bp_bh, leaving it as the initial value of NULL, but if the level of the b-tree root node is greater than 1, nilfs_btree_get_nonroot_node(), which accesses the buffer memory of path[x].bp_bh, is called. Fix this issue by adding a check to nilfs_btree_root_broken(), which performs sanity checks when reading the root node from the device, to detect this inconsistency. Thanks to Lizhi Xu for trying to solve the bug and clarifying the cause early on. Link: https://lkml.kernel.org/r/20240904081401.16682-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240902084101.138971-1-lizhi.xu@windriver.com Link: https://lkml.kernel.org/r/20240904081401.16682-2-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+9bff4c7b992038a7409f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bff4c7b992038a7409f Cc: Lizhi Xu Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 598f05867059..fa51ed649968 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); From 0f28b3b51fc1d9ef0e768ce03d262ea88447344b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:08 +0900 Subject: [PATCH 235/534] nilfs2: determine empty node blocks as corrupted [ Upstream commit 111b812d3662f3a1b831d19208f83aa711583fe6 ] Due to the nature of b-trees, nilfs2 itself and admin tools such as mkfs.nilfs2 will never create an intermediate b-tree node block with 0 child nodes, nor will they delete (key, pointer)-entries that would result in such a state. However, it is possible that a b-tree node block is corrupted on the backing device and is read with 0 child nodes. Because operation is not guaranteed if the number of child nodes is 0 for intermediate node blocks other than the root node, modify nilfs_btree_node_broken(), which performs sanity checks when reading a b-tree node block, so that such cases will be judged as metadata corruption. Link: https://lkml.kernel.org/r/20240904081401.16682-3-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index fa51ed649968..dac090c162a7 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", From 257f9e5185eb6de83377caea686c306e22e871f2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:09 +0900 Subject: [PATCH 236/534] nilfs2: fix potential oob read in nilfs_btree_check_delete() [ Upstream commit f9c96351aa6718b42a9f42eaf7adce0356bdb5e8 ] The function nilfs_btree_check_delete(), which checks whether degeneration to direct mapping occurs before deleting a b-tree entry, causes memory access outside the block buffer when retrieving the maximum key if the root node has no entries. This does not usually happen because b-tree mappings with 0 child nodes are never created by mkfs.nilfs2 or nilfs2 itself. However, it can happen if the b-tree root node read from a device is configured that way, so fix this potential issue by adding a check for that case. Link: https://lkml.kernel.org/r/20240904081401.16682-4-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index dac090c162a7..dbd27a44632f 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1659,13 +1659,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1674,12 +1677,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; From 81197a9b45109522d1eb2755eb65662c3b23a080 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:46 +0200 Subject: [PATCH 237/534] bpf: Fix bpf_strtol and bpf_strtoul helpers for 32bit [ Upstream commit cfe69c50b05510b24e26ccb427c7cc70beafd6c1 ] The bpf_strtol() and bpf_strtoul() helpers are currently broken on 32bit: The argument type ARG_PTR_TO_LONG is BPF-side "long", not kernel-side "long" and therefore always considered fixed 64bit no matter if 64 or 32bit underlying architecture. This contract breaks in case of the two mentioned helpers since their BPF_CALL definition for the helpers was added with {unsigned,}long *res. Meaning, the transition from BPF-side "long" (BPF program) to kernel-side "long" (BPF helper) breaks here. Both helpers call __bpf_strtoll() with "long long" correctly, but later assigning the result into 32-bit "*(long *)" on 32bit architectures. From a BPF program point of view, this means upper bits will be seen as uninitialised. Therefore, fix both BPF_CALL signatures to {s,u}64 types to fix this situation. Now, changing also uapi/bpf.h helper documentation which generates bpf_helper_defs.h for BPF programs is tricky: Changing signatures there to __{s,u}64 would trigger compiler warnings (incompatible pointer types passing 'long *' to parameter of type '__s64 *' (aka 'long long *')) for existing BPF programs. Leaving the signatures as-is would be fine as from BPF program point of view it is still BPF-side "long" and thus equivalent to __{s,u}64 on 64 or 32bit underlying architectures. Note that bpf_strtol() and bpf_strtoul() are the only helpers with this issue. Fixes: d7a4cb9b6705 ("bpf: Introduce bpf_strtol and bpf_strtoul helpers") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/481fcec8-c12c-9abb-8ecb-76c71c009959@iogearbox.net Link: https://lore.kernel.org/r/20240913191754.13290-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9ab6be965305..0dad29f3e3ba 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -516,7 +516,7 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, } BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, - long *, res) + s64 *, res) { long long _res; int err; @@ -541,7 +541,7 @@ const struct bpf_func_proto bpf_strtol_proto = { }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, - unsigned long *, res) + u64 *, res) { unsigned long long _res; bool is_negative; From a2c8dc7e21803257e762b0bf067fd13e9c995da0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:48 +0200 Subject: [PATCH 238/534] bpf: Fix helper writes to read-only maps [ Upstream commit 32556ce93bc45c730829083cb60f95a2728ea48b ] Lonial found an issue that despite user- and BPF-side frozen BPF map (like in case of .rodata), it was still possible to write into it from a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT} as arguments. In check_func_arg() when the argument is as mentioned, the meta->raw_mode is never set. Later, check_helper_mem_access(), under the case of PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the subsequent call to check_map_access_type() and given the BPF map is read-only it succeeds. The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT when results are written into them as opposed to read out of them. The latter indicates that it's okay to pass a pointer to uninitialized memory as the memory is written to anyway. However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM just with additional alignment requirement. So it is better to just get rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the fixed size memory types. For this, add MEM_ALIGNED to additionally ensure alignment given these helpers write directly into the args via * = val. The .arg*_size has been initialized reflecting the actual sizeof(*). MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated argument types, since in !MEM_FIXED_SIZE cases the verifier does not know the buffer size a priori and therefore cannot blindly write * = val. Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- include/linux/bpf.h | 7 +++++-- kernel/bpf/helpers.c | 6 ++++-- kernel/bpf/syscall.c | 3 ++- kernel/bpf/verifier.c | 41 +++++----------------------------------- kernel/trace/bpf_trace.c | 6 ++++-- net/core/filter.c | 6 ++++-- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e4cd28c38b82..722102518dc9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -675,6 +675,11 @@ enum bpf_type_flag { /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -711,8 +716,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0dad29f3e3ba..10ad4bd5574a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -537,7 +537,8 @@ const struct bpf_func_proto bpf_strtol_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(s64), }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, @@ -565,7 +566,8 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(u64), }; BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 65df92f5b192..d7df95bcc848 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5667,7 +5667,8 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(u64), }; static const struct bpf_func_proto * diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9d5699942273..f778a1b9e1cf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7997,16 +7997,6 @@ static bool arg_type_is_dynptr(enum bpf_arg_type type) return base_type(type) == ARG_PTR_TO_DYNPTR; } -static int int_ptr_type_to_size(enum bpf_arg_type type) -{ - if (type == ARG_PTR_TO_INT) - return sizeof(u32); - else if (type == ARG_PTR_TO_LONG) - return sizeof(u64); - - return -EINVAL; -} - static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type) @@ -8079,16 +8069,6 @@ static const struct bpf_reg_types mem_types = { }, }; -static const struct bpf_reg_types int_ptr_types = { - .types = { - PTR_TO_STACK, - PTR_TO_PACKET, - PTR_TO_PACKET_META, - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - }, -}; - static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE, @@ -8143,8 +8123,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, - [ARG_PTR_TO_INT] = &int_ptr_types, - [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK] = &stack_ptr_types, @@ -8651,9 +8629,11 @@ skip_type_check: */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, regno, - fn->arg_size[arg], false, - meta); + err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta); + if (err) + return err; + if (arg_type & MEM_ALIGNED) + err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); } break; case ARG_CONST_SIZE: @@ -8678,17 +8658,6 @@ skip_type_check: if (err) return err; break; - case ARG_PTR_TO_INT: - case ARG_PTR_TO_LONG: - { - int size = int_ptr_type_to_size(arg_type); - - err = check_helper_mem_access(env, regno, size, false, meta); - if (err) - return err; - err = check_ptr_alignment(env, reg, 0, size, true); - break; - } case ARG_PTR_TO_CONST_STR: { struct bpf_map *map = reg->map_ptr; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cc29bf49f715..37a19598d712 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1220,7 +1220,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_LONG, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u64), }; BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) @@ -1236,7 +1237,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_LONG, + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg2_size = sizeof(u64), }; BPF_CALL_1(get_func_arg_cnt, void *, ctx) diff --git a/net/core/filter.c b/net/core/filter.c index be313928d272..ecde9111fc95 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6306,7 +6306,8 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_INT, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6317,7 +6318,8 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_INT, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; From abf7559b4ff697714a6ca2600c16b02cfd75a2cb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:49 +0200 Subject: [PATCH 239/534] bpf: Improve check_raw_mode_ok test for MEM_UNINIT-tagged types [ Upstream commit 18752d73c1898fd001569195ba4b0b8c43255f4a ] When checking malformed helper function signatures, also take other argument types into account aside from just ARG_PTR_TO_UNINIT_MEM. This concerns (formerly) ARG_PTR_TO_{INT,LONG} given uninitialized memory can be passed there, too. The func proto sanity check goes back to commit 435faee1aae9 ("bpf, verifier: add ARG_PTR_TO_RAW_STACK type"), and its purpose was to detect wrong func protos which had more than just one MEM_UNINIT-tagged type as arguments. The reason more than one is currently not supported is as we mark stack slots with STACK_MISC in check_helper_call() in case of raw mode based on meta.access_size to allow uninitialized stack memory to be passed to helpers when they just write into the buffer. Probing for base type as well as MEM_UNINIT tagging ensures that other types do not get missed (as it used to be the case for ARG_PTR_TO_{INT,LONG}). Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types") Reported-by: Shung-Hsi Yu Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240913191754.13290-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f778a1b9e1cf..834394faf2af 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7987,6 +7987,12 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } +static bool arg_type_is_raw_mem(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_MEM && + type & MEM_UNINIT; +} + static bool arg_type_is_release(enum bpf_arg_type type) { return type & OBJ_RELEASE; @@ -9009,15 +9015,15 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) { int count = 0; - if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg1_type)) count++; - if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg2_type)) count++; - if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg3_type)) count++; - if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg4_type)) count++; - if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg5_type)) count++; /* We only support one arg being in raw mode at the moment, From a634fa8e480ac2423f86311a602f6295df2c8ed0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:50 +0200 Subject: [PATCH 240/534] bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error [ Upstream commit 4b3786a6c5397dc220b1483d8e2f4867743e966f ] For all non-tracing helpers which formerly had ARG_PTR_TO_{LONG,INT} as input arguments, zero the value for the case of an error as otherwise it could leak memory. For tracing, it is not needed given CAP_PERFMON can already read all kernel memory anyway hence bpf_get_func_arg() and bpf_get_func_ret() is skipped in here. Also, the MTU helpers mtu_len pointer value is being written but also read. Technically, the MEM_UNINIT should not be there in order to always force init. Removing MEM_UNINIT needs more verifier rework though: MEM_UNINIT right now implies two things actually: i) write into memory, ii) memory does not have to be initialized. If we lift MEM_UNINIT, it then becomes: i) read into memory, ii) memory must be initialized. This means that for bpf_*_check_mtu() we're readding the issue we're trying to fix, that is, it would then be able to write back into things like .rodata BPF maps. Follow-up work will rework the MEM_UNINIT semantics such that the intent can be better expressed. For now just clear the *mtu_len on error path which can be lifted later again. Fixes: 8a67f2de9b1d ("bpf: expose bpf_strtol and bpf_strtoul to all program types") Fixes: d7a4cb9b6705 ("bpf: Introduce bpf_strtol and bpf_strtoul helpers") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/e5edd241-59e7-5e39-0ee5-a51e31b6840a@iogearbox.net Link: https://lore.kernel.org/r/20240913191754.13290-5-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/helpers.c | 2 ++ kernel/bpf/syscall.c | 1 + net/core/filter.c | 44 +++++++++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 10ad4bd5574a..3dba5bb294d8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -521,6 +521,7 @@ BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, long long _res; int err; + *res = 0; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; @@ -548,6 +549,7 @@ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, bool is_negative; int err; + *res = 0; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d7df95bcc848..b1933d074f05 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5647,6 +5647,7 @@ static const struct bpf_func_proto bpf_sys_close_proto = { BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) { + *res = 0; if (flags) return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index ecde9111fc95..8bfd46a070c1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6222,20 +6222,25 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, int ret = BPF_MTU_CHK_RET_FRAG_NEEDED; struct net_device *dev = skb->dev; int skb_len, dev_len; - int mtu; + int mtu = 0; - if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) - return -EINVAL; + if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) { + ret = -EINVAL; + goto out; + } - if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) - return -EINVAL; + if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) { + ret = -EINVAL; + goto out; + } dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) - return -ENODEV; + if (unlikely(!dev)) { + ret = -ENODEV; + goto out; + } mtu = READ_ONCE(dev->mtu); - dev_len = mtu + dev->hard_header_len; /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ @@ -6253,15 +6258,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, */ if (skb_is_gso(skb)) { ret = BPF_MTU_CHK_RET_SUCCESS; - if (flags & BPF_MTU_CHK_SEGS && !skb_gso_validate_network_len(skb, mtu)) ret = BPF_MTU_CHK_RET_SEGS_TOOBIG; } out: - /* BPF verifier guarantees valid pointer */ *mtu_len = mtu; - return ret; } @@ -6271,19 +6273,21 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, struct net_device *dev = xdp->rxq->dev; int xdp_len = xdp->data_end - xdp->data; int ret = BPF_MTU_CHK_RET_SUCCESS; - int mtu, dev_len; + int mtu = 0, dev_len; /* XDP variant doesn't support multi-buffer segment check (yet) */ - if (unlikely(flags)) - return -EINVAL; + if (unlikely(flags)) { + ret = -EINVAL; + goto out; + } dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) - return -ENODEV; + if (unlikely(!dev)) { + ret = -ENODEV; + goto out; + } mtu = READ_ONCE(dev->mtu); - - /* Add L2-header as dev MTU is L3 size */ dev_len = mtu + dev->hard_header_len; /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ @@ -6293,10 +6297,8 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, xdp_len += len_diff; /* minus result pass check */ if (xdp_len > dev_len) ret = BPF_MTU_CHK_RET_FRAG_NEEDED; - - /* BPF verifier guarantees valid pointer */ +out: *mtu_len = mtu; - return ret; } From 1490a5dbd55f4b5bfccf6bbbcc7956c8f17e20ba Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 31 Jul 2024 16:55:01 -0700 Subject: [PATCH 241/534] perf mem: Free the allocated sort string, fixing a leak [ Upstream commit 3da209bb1177462b6fe8e3021a5527a5a49a9336 ] The get_sort_order() returns either a new string (from strdup) or NULL but it never gets freed. Signed-off-by: Namhyung Kim Fixes: 2e7f545096f954a9 ("perf mem: Factor out a function to generate sort order") Cc: Adrian Hunter Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20240731235505.710436-3-namhyung@kernel.org [ Added Fixes tag ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 51499c20da01..865f321d729b 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -372,6 +372,7 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) rep_argv[i] = argv[j]; ret = cmd_report(i, rep_argv); + free(new_sort_order); free(rep_argv); return ret; } From ba18185bea37d4ce8d160ac824bc2890588775e2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 29 Jul 2024 15:06:20 -0700 Subject: [PATCH 242/534] perf inject: Fix leader sampling inserting additional samples [ Upstream commit 79bcd34e0f3da39fda841406ccc957405e724852 ] The processing of leader samples would turn an individual sample with a group of read values into multiple samples. 'perf inject' would pass through the additional samples increasing the output data file size: $ perf record -g -e "{instructions,cycles}:S" -o perf.orig.data true $ perf script -D -i perf.orig.data | sed -e 's/perf.orig.data/perf.data/g' > orig.txt $ perf inject -i perf.orig.data -o perf.new.data $ perf script -D -i perf.new.data | sed -e 's/perf.new.data/perf.data/g' > new.txt $ diff -u orig.txt new.txt --- orig.txt 2024-07-29 14:29:40.606576769 -0700 +++ new.txt 2024-07-29 14:30:04.142737434 -0700 ... -0xc550@perf.data [0x30]: event: 3 +0xc550@perf.data [0xd0]: event: 9 +. +. ... raw event: size 208 bytes +. 0000: 09 00 00 00 01 00 d0 00 fc 72 01 86 ff ff ff ff .........r...... +. 0010: 74 7d 2c 00 74 7d 2c 00 fb c3 79 f9 ba d5 05 00 t},.t},...y..... +. 0020: e6 cb 1a 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ +. 0030: 02 00 00 00 00 00 00 00 76 01 00 00 00 00 00 00 ........v....... +. 0040: e6 cb 1a 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +. 0050: 62 18 00 00 00 00 00 00 f6 cb 1a 00 00 00 00 00 b............... +. 0060: 00 00 00 00 00 00 00 00 0c 00 00 00 00 00 00 00 ................ +. 0070: 80 ff ff ff ff ff ff ff fc 72 01 86 ff ff ff ff .........r...... +. 0080: f3 0e 6e 85 ff ff ff ff 0c cb 7f 85 ff ff ff ff ..n............. +. 0090: bc f2 87 85 ff ff ff ff 44 af 7f 85 ff ff ff ff ........D....... +. 00a0: bd be 7f 85 ff ff ff ff 26 d0 7f 85 ff ff ff ff ........&....... +. 00b0: 6d a4 ff 85 ff ff ff ff ea 00 20 86 ff ff ff ff m......... ..... +. 00c0: 00 fe ff ff ff ff ff ff 57 14 4f 43 fc 7e 00 00 ........W.OC.~.. + +1642373909693435 0xc550 [0xd0]: PERF_RECORD_SAMPLE(IP, 0x1): 2915700/2915700: 0xffffffff860172fc period: 1 addr: 0 +... FP chain: nr:12 +..... 0: ffffffffffffff80 +..... 1: ffffffff860172fc +..... 2: ffffffff856e0ef3 +..... 3: ffffffff857fcb0c +..... 4: ffffffff8587f2bc +..... 5: ffffffff857faf44 +..... 6: ffffffff857fbebd +..... 7: ffffffff857fd026 +..... 8: ffffffff85ffa46d +..... 9: ffffffff862000ea +..... 10: fffffffffffffe00 +..... 11: 00007efc434f1457 +... sample_read: +.... group nr 2 +..... id 00000000001acbe6, value 0000000000000176, lost 0 +..... id 00000000001acbf6, value 0000000000001862, lost 0 + +0xc620@perf.data [0x30]: event: 3 ... This behavior is incorrect as in the case above 'perf inject' should have done nothing. Fix this behavior by disabling separating samples for a tool that requests it. Only request this for `perf inject` so as to not affect other perf tools. With the patch and the test above there are no differences between the orig.txt and new.txt. Fixes: e4caec0d1af3d608 ("perf evsel: Add PERF_SAMPLE_READ sample related processing") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240729220620.2957754-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-inject.c | 1 + tools/perf/util/session.c | 3 +++ tools/perf/util/tool.h | 1 + 3 files changed, 5 insertions(+) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index eb3ef5c24b66..8aba05665467 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2200,6 +2200,7 @@ int cmd_inject(int argc, const char **argv) .finished_init = perf_event__repipe_op2_synth, .compressed = perf_event__repipe_op4_synth, .auxtrace = perf_event__repipe_auxtrace, + .dont_split_sample_group = true, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c6afba7ab1a5..277b2cbd5186 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1500,6 +1500,9 @@ static int deliver_sample_group(struct evlist *evlist, int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; + if (tool->dont_split_sample_group) + return deliver_sample_value(evlist, tool, event, sample, v, machine); + sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, machine); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c957fb849ac6..62bbc9cec151 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -85,6 +85,7 @@ struct perf_tool { bool namespace_events; bool cgroup_events; bool no_warn; + bool dont_split_sample_group; enum show_feature_header show_feat_hdr; }; From 4ef032d89995365efd87420e172d8777358f4869 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:04 -0700 Subject: [PATCH 243/534] perf annotate: Split branch stack cycles info from 'struct annotation' [ Upstream commit b7f87e32590bf48eca84f729d3422be7b8dc22d3 ] The cycles info is only meaningful when sample has branch stacks. To save the memory for normal cases, move those fields to a new 'struct annotated_branch' and dynamically allocate it when needed. Also move cycles_hist from annotated_source as it's related here. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 3ef44458071a ("perf report: Fix --total-cycles --stdio output error") Signed-off-by: Sasha Levin --- tools/perf/util/annotate.c | 101 +++++++++++++++++++---------------- tools/perf/util/annotate.h | 19 ++++--- tools/perf/util/block-info.c | 4 +- tools/perf/util/sort.c | 14 ++--- 4 files changed, 75 insertions(+), 63 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 83da2bceb595..e594be2b7134 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -813,7 +813,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } @@ -848,18 +847,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -868,9 +855,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -961,23 +949,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } +static struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) - return NULL; - goto alloc_cycles_hist; + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + const size_t size = symbol__size(sym); + + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); } - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); - } - - return notes->src->cycles_hist; + return branch->cycles_hist; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -1086,6 +1084,14 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; @@ -1094,6 +1100,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ @@ -1109,10 +1116,11 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } @@ -1122,19 +1130,19 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) + if (!notes->branch || !notes->branch->cycles_hist) return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; @@ -1153,13 +1161,12 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) al->cycles->max = ch->cycles_max; al->cycles->min = ch->cycles_min; } - notes->have_cycles = true; } } if (err) { while (++offset < (s64)size) { - struct cyc_hist *ch = ¬es->src->cycles_hist[offset]; + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al = notes->offsets[offset]; @@ -1325,6 +1332,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -3074,13 +3082,14 @@ call_like: static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", @@ -3105,7 +3114,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles && al->cycles) { + if (notes->branch && al->cycles) { if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else @@ -3142,7 +3151,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } } - if (notes->have_cycles) { + if (notes->branch) { if (al->cycles && al->cycles->ipc) obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0fa72eb559ac..dd612f9f04c9 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -272,17 +272,20 @@ struct annotated_source { struct list_head source; int nr_histograms; size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; struct sym_hist *histograms; }; +struct annotated_branch { + u64 hit_cycles; + u64 hit_insn; + unsigned int total_insn; + unsigned int cover_insn; + struct cyc_hist *cycles_hist; +}; + struct LOCKABLE annotation { u64 max_coverage; u64 start; - u64 hit_cycles; - u64 hit_insn; - unsigned int total_insn; - unsigned int cover_insn; struct annotation_options *options; struct annotation_line **offsets; int nr_events; @@ -298,8 +301,8 @@ struct LOCKABLE annotation { u8 max_addr; u8 max_ins_name; } widths; - bool have_cycles; struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -313,10 +316,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && notes->options->show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385..08f82c1f166c 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6ab8147a3f87..b80349ca2199 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -583,21 +583,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); From 4c857dcf34a5e642e70dd64b4254d15248ad2096 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:06 -0700 Subject: [PATCH 244/534] perf annotate: Move some source code related fields from 'struct annotation' to 'struct annotated_source' [ Upstream commit 0aae4c99c5f8f748c6cb5ca03bb3b3ae8cfb10df ] Some fields in the 'struct annotation' are only used with 'struct annotated_source' so better to be moved there in order to reduce memory consumption for other symbols. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 3ef44458071a ("perf report: Fix --total-cycles --stdio output error") Signed-off-by: Sasha Levin --- tools/perf/ui/browsers/annotate.c | 12 ++++++------ tools/perf/util/annotate.c | 17 +++++++++-------- tools/perf/util/annotate.h | 14 +++++++------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d9f9fa254a71..1fb7118ef507 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -384,7 +384,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx; - browser->b.nr_entries = notes->nr_entries; + browser->b.nr_entries = notes->src->nr_entries; notes->options->hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; @@ -402,7 +402,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx_asm; - browser->b.nr_entries = notes->nr_asm_entries; + browser->b.nr_entries = notes->src->nr_asm_entries; notes->options->hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; @@ -435,7 +435,7 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); ui_browser__reset_index(browser); - browser->nr_entries = notes->nr_asm_entries; + browser->nr_entries = notes->src->nr_asm_entries; } static int sym_title(struct symbol *sym, struct map *map, char *title, @@ -860,7 +860,7 @@ show_help: browser->b.height, browser->b.index, browser->b.top_idx, - notes->nr_asm_entries); + notes->src->nr_asm_entries); } continue; case K_ENTER: @@ -991,8 +991,8 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ui_helpline__push("Press ESC to exit"); - browser.b.width = notes->max_line_len; - browser.b.nr_entries = notes->nr_entries; + browser.b.width = notes->src->max_line_len; + browser.b.nr_entries = notes->src->nr_entries; browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e594be2b7134..6dfe11cbf30e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2825,19 +2825,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__set_offsets(struct annotation *notes, s64 size) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; + if (src->max_line_len < line_len) + src->max_line_len = line_len; + al->idx = src->nr_entries++; if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; + al->idx_asm = src->nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly * routines that comes with labels in the same column diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index dd612f9f04c9..238e7ef289a8 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -269,10 +269,13 @@ struct cyc_hist { * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct sym_hist *histograms; + struct list_head source; + size_t sizeof_sym_hist; + struct sym_hist *histograms; + int nr_histograms; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; }; struct annotated_branch { @@ -290,9 +293,6 @@ struct LOCKABLE annotation { struct annotation_line **offsets; int nr_events; int max_jump_sources; - int nr_entries; - int nr_asm_entries; - u16 max_line_len; struct { u8 addr; u8 jumps; From 297871cb5115780fd7b4ddff502002938b8e4666 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:38 -0800 Subject: [PATCH 245/534] perf ui/browser/annotate: Use global annotation_options [ Upstream commit 22197fb296913f83c7182befd2a8b23bf042f279 ] Now it can use the global options and no need save local browser options separately. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 3ef44458071a ("perf report: Fix --total-cycles --stdio output error") Signed-off-by: Sasha Levin --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 8 ++-- tools/perf/builtin-top.c | 3 +- tools/perf/ui/browsers/annotate.c | 65 ++++++++++++++----------------- tools/perf/ui/browsers/hists.c | 34 ++++++---------- tools/perf/ui/browsers/hists.h | 2 - tools/perf/util/annotate.h | 6 +-- tools/perf/util/block-info.c | 6 +-- tools/perf/util/block-info.h | 3 +- tools/perf/util/hist.h | 25 ++++-------- 10 files changed, 59 insertions(+), 95 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 92973420c0a5..0f1e5787b4ed 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -378,7 +378,7 @@ find_next: /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL, &annotate_opts); + key = hist_entry__tui_annotate(he, evsel, NULL); switch (key) { case -1: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2a8889c6d7f9..212760e4dd16 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -540,8 +540,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, - &rep->session->header.env, - &annotate_opts); + &rep->session->header.env); if (ret != 0) return ret; } @@ -573,8 +572,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos, - NULL, NULL); + rep->min_percent, pos, NULL); continue; } @@ -669,7 +667,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true, &annotate_opts); + &session->header.env, true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6ac17763de0e..1c1ec444d501 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -646,8 +646,7 @@ repeat: } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, !top->record_opts.overwrite, - &annotate_opts); + &top->session->header.env, !top->record_opts.overwrite); if (ret == K_RELOAD) { top->zero = true; goto repeat; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1fb7118ef507..20f24d104da8 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -27,7 +27,6 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; - struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -97,7 +96,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int struct annotation_write_ops ops = { .first_line = row == 0, .current_entry = is_current_entry, - .change_color = (!notes->options->hide_src_code && + .change_color = (!annotate_opts.hide_src_code && (!is_current_entry || (browser->use_navkeypressed && !browser->navkeypressed))), @@ -128,7 +127,7 @@ static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) while (pos && pos->al.offset == -1) { pos = list_prev_entry(pos, al.node); - if (!ab->opts->hide_src_code) + if (!annotate_opts.hide_src_code) diff++; } @@ -195,7 +194,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) return; } - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { from = cursor->al.idx_asm; to = target->idx_asm; } else { @@ -224,7 +223,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) int ret = ui_browser__list_head_refresh(browser); int pcnt_width = annotation__pcnt_width(notes); - if (notes->options->jump_arrows) + if (annotate_opts.jump_arrows) annotate_browser__draw_current_jump(browser); ui_browser__set_color(browser, HE_COLORSET_NORMAL); @@ -258,7 +257,7 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, browser->opts->percent_type) < 0) + if (disasm__cmp(al, l, annotate_opts.percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -294,11 +293,10 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct annotation *notes = browser__annotation(&browser->b); struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node); u32 idx = pos->idx; - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) idx = pos->idx_asm; annotate_browser__set_top(browser, pos, idx); browser->curr_hot = nd; @@ -331,7 +329,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double percent; percent = annotation_data__percent(&pos->al.data[i], - browser->opts->percent_type); + annotate_opts.percent_type); if (max_percent < percent) max_percent = percent; @@ -380,12 +378,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); al = list_entry(browser->b.top, struct annotation_line, node); - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { if (al->idx_asm < offset) offset = al->idx; browser->b.nr_entries = notes->src->nr_entries; - notes->options->hide_src_code = false; + annotate_opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; browser->b.index = al->idx; @@ -403,7 +401,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) offset = al->idx_asm; browser->b.nr_entries = notes->src->nr_asm_entries; - notes->options->hide_src_code = true; + annotate_opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; browser->b.index = al->idx_asm; @@ -483,8 +481,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; annotation__unlock(notes); - symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); - sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); + symbol__tui_annotate(&target_ms, evsel, hbt); + sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type); ui_browser__show_title(&browser->b, title); return true; } @@ -659,7 +657,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) { - struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct map_symbol *ms = browser->priv; struct symbol *sym = ms->sym; char symbol_dso[SYM_TITLE_MAX_SIZE]; @@ -667,7 +664,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const if (ui_browser__show(browser, title, help) < 0) return -1; - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type); ui_browser__gotorc_title(browser, 0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); @@ -809,7 +806,7 @@ static int annotate_browser__run(struct annotate_browser *browser, annotate_browser__show(&browser->b, title, help); continue; case 'k': - notes->options->show_linenr = !notes->options->show_linenr; + annotate_opts.show_linenr = !annotate_opts.show_linenr; continue; case 'l': annotate_browser__show_full_location (&browser->b); @@ -822,18 +819,18 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts(help); continue; case 'o': - notes->options->use_offset = !notes->options->use_offset; + annotate_opts.use_offset = !annotate_opts.use_offset; annotation__update_column_widths(notes); continue; case 'O': - if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) - notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL; continue; case 'j': - notes->options->jump_arrows = !notes->options->jump_arrows; + annotate_opts.jump_arrows = !annotate_opts.jump_arrows; continue; case 'J': - notes->options->show_nr_jumps = !notes->options->show_nr_jumps; + annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps; annotation__update_column_widths(notes); continue; case '/': @@ -897,15 +894,15 @@ show_sup_ins: annotation__update_column_widths(notes); continue; case 'c': - if (notes->options->show_minmax_cycle) - notes->options->show_minmax_cycle = false; + if (annotate_opts.show_minmax_cycle) + annotate_opts.show_minmax_cycle = false; else - notes->options->show_minmax_cycle = true; + annotate_opts.show_minmax_cycle = true; annotation__update_column_widths(notes); continue; case 'p': case 'b': - switch_percent_type(browser->opts, key == 'b'); + switch_percent_type(&annotate_opts, key == 'b'); hists__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); continue; @@ -932,26 +929,23 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { - return symbol__tui_annotate(ms, evsel, hbt, opts); + return symbol__tui_annotate(ms, evsel, hbt); } int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); - return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); @@ -965,7 +959,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .priv = ms, .use_navkeypressed = true, }, - .opts = opts, }; struct dso *dso; int ret = -1, err; @@ -996,7 +989,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) ui_browser__init_asm_mode(&browser.b); ret = annotate_browser__run(&browser, evsel, hbt); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 70db5a717905..bb59d27642cc 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2250,8 +2250,7 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env, - struct annotation_options *annotation_opts) + struct perf_env *env) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2259,7 +2258,6 @@ perf_evsel_browser__new(struct evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; - browser->annotation_opts = annotation_opts; } return browser; } @@ -2432,8 +2430,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!browser->annotation_opts->objdump_path && - perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) + if (!annotate_opts.objdump_path && + perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2445,8 +2443,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) else evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, - browser->annotation_opts); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2943,11 +2940,10 @@ next: static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + struct perf_env *env, bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); struct branch_info *bi = NULL; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3398,7 +3394,6 @@ out: struct evsel_menu { struct ui_browser b; struct evsel *selection; - struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3499,8 +3494,7 @@ browse_hists: hbt->timer(hbt->arg); key = evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, menu->env, - warn_lost_event, - menu->annotation_opts); + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3557,7 +3551,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event, struct annotation_options *annotation_opts) + bool warn_lost_event) { struct evsel *pos; struct evsel_menu menu = { @@ -3572,7 +3566,6 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con }, .min_pcnt = min_pcnt, .env = env, - .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3607,8 +3600,7 @@ static bool evlist__single_entry(struct evlist *evlist) } int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + float min_pcnt, struct perf_env *env, bool warn_lost_event) { int nr_entries = evlist->core.nr_entries; @@ -3617,7 +3609,7 @@ single_entry: { struct evsel *first = evlist__first(evlist); return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event, annotation_opts); + env, warn_lost_event); } } @@ -3635,7 +3627,7 @@ single_entry: { } return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event, annotation_opts); + warn_lost_event); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, @@ -3654,8 +3646,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, } int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts) + float min_percent, struct perf_env *env) { struct hists *hists = &bh->block_hists; struct hist_browser *browser; @@ -3672,7 +3663,6 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, browser->title = block_hists_browser__title; browser->min_pcnt = min_percent; browser->env = env; - browser->annotation_opts = annotation_opts; /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 1e938d9ffa5e..de46f6c56b0e 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,7 +4,6 @@ #include "ui/browser.h" -struct annotation_options; struct evsel; struct hist_browser { @@ -15,7 +14,6 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; - struct annotation_options *annotation_opts; struct evsel *block_evsel; int print_seq; bool show_dso; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 238e7ef289a8..b79614c44a24 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -412,13 +412,11 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); + struct hist_browser_timer *hbt); #else static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 08f82c1f166c..dec910989701 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0a..96f53e89795e 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc..5d0db96609df 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -457,7 +457,6 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; struct res_sample; enum rstype { @@ -473,16 +472,13 @@ struct block_hist; void attr_to_script(char *buf, struct perf_event_attr *attr); int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +488,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,23 +496,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -541,8 +533,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } From 88c4b5dd2107b1678866f3fb38093caa81df9985 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Aug 2024 09:02:00 -0700 Subject: [PATCH 246/534] perf report: Fix --total-cycles --stdio output error [ Upstream commit 3ef44458071a19e5b5832cdfe6f75273aa521b6e ] The --total-cycles may output wrong information with the --stdio. For example: # perf record -e "{cycles,instructions}",cache-misses -b sleep 1 # perf report --total-cycles --stdio The total cycles output of {cycles,instructions} and cache-misses are almost the same. # Samples: 938 of events 'anon group { cycles, instructions }' # Event count (approx.): 938 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] # ............... .............. ........... .......... ..................................................> # 11.19% 2.6K 0.10% 21 [perf_iterate_ctx+48 -> > 5.79% 1.4K 0.45% 97 [__intel_pmu_enable_all.constprop.0+80 -> __intel_> 5.11% 1.2K 0.33% 71 [native_write_msr+0 ->> # Samples: 293 of event 'cache-misses' # Event count (approx.): 293 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] # ............... .............. ........... .......... ..................................................> # 11.19% 2.6K 0.13% 21 [perf_iterate_ctx+48 -> > 5.79% 1.4K 0.59% 97 [__intel_pmu_enable_all.constprop.0+80 -> __intel_> 5.11% 1.2K 0.43% 71 [native_write_msr+0 ->> With the symbol_conf.event_group, the 'perf report' should only report the block information of the leader event in a group. However, the current implementation retrieves the next event's block information, rather than the next group leader's block information. Make sure the index is updated even if the event is skipped. With the patch, # Samples: 293 of event 'cache-misses' # Event count (approx.): 293 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] # ............... .............. ........... .......... ..................................................> # 37.98% 9.0K 4.05% 299 [perf_event_addr_filters_exec+0 -> perf_event_a> 11.19% 2.6K 0.28% 21 [perf_iterate_ctx+48 -> > 5.79% 1.4K 1.32% 97 [__intel_pmu_enable_all.constprop.0+80 -> __intel_> Fixes: 6f7164fa231a5f36 ("perf report: Sort by sampled cycles percent per block for stdio") Reviewed-by: Andi Kleen Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jin Yao Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20240813160208.2493643-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-report.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 212760e4dd16..cd2f3f1a7563 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -562,6 +562,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c struct hists *hists = evsel__hists(pos); const char *evname = evsel__name(pos); + i++; if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; @@ -571,7 +572,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c hists__fprintf_nr_sample_events(hists, rep, evname, stdout); if (rep->total_cycles_mode) { - report__browse_block_hists(&rep->block_reports[i++].hist, + report__browse_block_hists(&rep->block_reports[i - 1].hist, rep->min_percent, pos, NULL); continue; } From 505ec05002c75eef4647a747566cbe294e6dec00 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 6 Aug 2024 10:35:33 +0800 Subject: [PATCH 247/534] perf sched timehist: Fix missing free of session in perf_sched__timehist() [ Upstream commit 6bdf5168b6fb19541b0c1862bdaa596d116c7bfb ] When perf_time__parse_str() fails in perf_sched__timehist(), need to free session that was previously created, fix it. Fixes: 853b74071110bed3 ("perf sched timehist: Add option to specify time window of interest") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240806023533.1316348-1-yangjihong@bytedance.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f21a655dd7f9..489a5ef2a25d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3068,7 +3068,8 @@ static int perf_sched__timehist(struct perf_sched *sched) if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { pr_err("Invalid time string\n"); - return -EINVAL; + err = -EINVAL; + goto out; } if (timehist_check_attr(sched, evlist) != 0) From fa0720b32afa89496fc0cf5c96705a908311cf40 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 2 Aug 2024 14:58:00 +0800 Subject: [PATCH 248/534] perf stat: Display iostat headers correctly [ Upstream commit 2615639352420e6e3115952c5b8f46846e1c6d0e ] Currently we'll only print metric headers for metric leader in aggregration mode. This will make `perf iostat` header not shown since it'll aggregrated globally but don't have metric events: root@ubuntu204:/home/yang/linux/tools/perf# ./perf stat --iostat --timeout 1000 Performance counter stats for 'system wide': port 0000:00 0 0 0 0 0000:80 0 0 0 0 [...] Fix this by excluding the iostat in the check of printing metric headers. Then we can see the headers: root@ubuntu204:/home/yang/linux/tools/perf# ./perf stat --iostat --timeout 1000 Performance counter stats for 'system wide': port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) 0000:00 0 0 0 0 0000:80 0 0 0 0 [...] Fixes: 193a9e30207f5477 ("perf stat: Don't display metric header for non-leader uncore events") Signed-off-by: Yicong Yang Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Junhao He Cc: linuxarm@huawei.com Cc: Mark Rutland Cc: Peter Zijlstra Cc: Shameerali Kolothum Thodi Cc: Zeng Tao Link: https://lore.kernel.org/r/20240802065800.48774-1-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 0abe35388ab1..f98ade7f9fba 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1207,7 +1207,8 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { - if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + if (!config->iostat_run && + config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) continue; os.evsel = counter; From 022f9328ef179643541256783044f3a713f075db Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 19 Aug 2024 10:47:20 +0800 Subject: [PATCH 249/534] perf sched timehist: Fixed timestamp error when unable to confirm event sched_in time [ Upstream commit 39c243411bdb8fb35777adf49ee32549633c4e12 ] If sched_in event for current task is not recorded, sched_in timestamp will be set to end_time of time window interest, causing an error in timestamp show. In this case, we choose to ignore this event. Test scenario: perf[1229608] does not record the first sched_in event, run time and sch delay are both 0 # perf sched timehist Samples of sched_switch event do not have callchains. time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 2090450.763231 [0000] perf[1229608] 0.000 0.000 0.000 2090450.763235 [0000] migration/0[15] 0.000 0.001 0.003 2090450.763263 [0001] perf[1229608] 0.000 0.000 0.000 2090450.763268 [0001] migration/1[21] 0.000 0.001 0.004 2090450.763302 [0002] perf[1229608] 0.000 0.000 0.000 2090450.763309 [0002] migration/2[27] 0.000 0.001 0.007 2090450.763338 [0003] perf[1229608] 0.000 0.000 0.000 2090450.763343 [0003] migration/3[33] 0.000 0.001 0.004 Before: arbitrarily specify a time window of interest, timestamp will be set to an incorrect value # perf sched timehist --time 100,200 Samples of sched_switch event do not have callchains. time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 200.000000 [0000] perf[1229608] 0.000 0.000 0.000 200.000000 [0001] perf[1229608] 0.000 0.000 0.000 200.000000 [0002] perf[1229608] 0.000 0.000 0.000 200.000000 [0003] perf[1229608] 0.000 0.000 0.000 200.000000 [0004] perf[1229608] 0.000 0.000 0.000 200.000000 [0005] perf[1229608] 0.000 0.000 0.000 200.000000 [0006] perf[1229608] 0.000 0.000 0.000 200.000000 [0007] perf[1229608] 0.000 0.000 0.000 After: # perf sched timehist --time 100,200 Samples of sched_switch event do not have callchains. time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- Fixes: 853b74071110bed3 ("perf sched timehist: Add option to specify time window of interest") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240819024720.2405244-1-yangjihong@bytedance.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-sched.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 489a5ef2a25d..42185da0f000 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2633,9 +2633,12 @@ static int timehist_sched_change_event(struct perf_tool *tool, * - previous sched event is out of window - we are done * - sample time is beyond window user cares about - reset it * to close out stats for time window interest + * - If tprev is 0, that is, sched_in event for current task is + * not recorded, cannot determine whether sched_in event is + * within time window interest - ignore it */ if (ptime->end) { - if (tprev > ptime->end) + if (!tprev || tprev > ptime->end) goto out; if (t > ptime->end) From 3ba5a2e91c706bd55a34ccc1a0db4b0e4d0d2054 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 31 Aug 2024 00:04:11 -0700 Subject: [PATCH 250/534] perf time-utils: Fix 32-bit nsec parsing [ Upstream commit 38e2648a81204c9fc5b4c87a8ffce93a6ed91b65 ] The "time utils" test fails in 32-bit builds: ... parse_nsec_time("18446744073.709551615") Failed. ptime 4294967295709551615 expected 18446744073709551615 ... Switch strtoul to strtoull as an unsigned long in 32-bit build isn't 64-bits. Fixes: c284d669a20d408b ("perf tools: Move parse_nsec_time to time-utils.c") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Chaitanya S Prakash Cc: Colin Ian King Cc: David Ahern Cc: Dominique Martinet Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Junhao He Cc: Kan Liang Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Link: https://lore.kernel.org/r/20240831070415.506194-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/time-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 302443921681..1b91ccd4d523 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -20,7 +20,7 @@ int parse_nsec_time(const char *str, u64 *ptime) u64 time_sec, time_nsec; char *end; - time_sec = strtoul(str, &end, 10); + time_sec = strtoull(str, &end, 10); if (*end != '.' && *end != '\0') return -1; @@ -38,7 +38,7 @@ int parse_nsec_time(const char *str, u64 *ptime) for (i = strlen(nsec_buf); i < 9; i++) nsec_buf[i] = '0'; - time_nsec = strtoul(nsec_buf, &end, 10); + time_nsec = strtoull(nsec_buf, &end, 10); if (*end != '\0') return -1; } else From bd553be1cfb230ebf919435f8290a00baa75a870 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 14 Jun 2024 15:42:03 +0800 Subject: [PATCH 251/534] clk: imx: clk-audiomix: Correct parent clock for earc_phy and audpll [ Upstream commit d40371a1c963db688b37826adaf5ffdafb0862a1 ] According to Reference Manual of i.MX8MP The parent clock of "earc_phy" is "sai_pll_out_div2", The parent clock of "audpll" is "osc_24m". Add CLK_GATE_PARENT() macro for usage of specifying parent clock. Fixes: 6cd95f7b151c ("clk: imx: imx8mp: Add audiomix block control") Signed-off-by: Shengjiu Wang Reviewed-by: Peng Fan Link: https://lore.kernel.org/r/1718350923-21392-6-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8mp-audiomix.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mp-audiomix.c b/drivers/clk/imx/clk-imx8mp-audiomix.c index 55ed211a5e0b..ab2a028b3027 100644 --- a/drivers/clk/imx/clk-imx8mp-audiomix.c +++ b/drivers/clk/imx/clk-imx8mp-audiomix.c @@ -146,6 +146,15 @@ static const struct clk_parent_data clk_imx8mp_audiomix_pll_bypass_sels[] = { PDM_SEL, 2, 0 \ } +#define CLK_GATE_PARENT(gname, cname, pname) \ + { \ + gname"_cg", \ + IMX8MP_CLK_AUDIOMIX_##cname, \ + { .fw_name = pname, .name = pname }, NULL, 1, \ + CLKEN0 + 4 * !!(IMX8MP_CLK_AUDIOMIX_##cname / 32), \ + 1, IMX8MP_CLK_AUDIOMIX_##cname % 32 \ + } + struct clk_imx8mp_audiomix_sel { const char *name; int clkid; @@ -163,14 +172,14 @@ static struct clk_imx8mp_audiomix_sel sels[] = { CLK_GATE("earc", EARC_IPG), CLK_GATE("ocrama", OCRAMA_IPG), CLK_GATE("aud2htx", AUD2HTX_IPG), - CLK_GATE("earc_phy", EARC_PHY), + CLK_GATE_PARENT("earc_phy", EARC_PHY, "sai_pll_out_div2"), CLK_GATE("sdma2", SDMA2_ROOT), CLK_GATE("sdma3", SDMA3_ROOT), CLK_GATE("spba2", SPBA2_ROOT), CLK_GATE("dsp", DSP_ROOT), CLK_GATE("dspdbg", DSPDBG_ROOT), CLK_GATE("edma", EDMA_ROOT), - CLK_GATE("audpll", AUDPLL_ROOT), + CLK_GATE_PARENT("audpll", AUDPLL_ROOT, "osc_24m"), CLK_GATE("mu2", MU2_ROOT), CLK_GATE("mu3", MU3_ROOT), CLK_PDM, From c2ee6de22dac61bbf406019b9f83455222413af3 Mon Sep 17 00:00:00 2001 From: Sebastien Laveze Date: Tue, 28 May 2024 17:14:33 +0200 Subject: [PATCH 252/534] clk: imx: imx6ul: fix default parent for enet*_ref_sel [ Upstream commit e52fd71333b4ed78fd5bb43094bdc46476614d25 ] The clk_set_parent for "enet1_ref_sel" and "enet2_ref_sel" are incorrect, therefore the original requirements to have "enet_clk_ref" as output sourced by iMX ENET PLL as a default config is not met. Only "enet[1,2]_ref_125m" "enet[1,2]_ref_pad" are possible parents for "enet1_ref_sel" and "enet2_ref_sel". This was observed as a regression using a custom device tree which was expecting this default config. This can be fixed at the device tree level but having a default config matching the original behavior (before refclock mux) will avoid breaking existing configs. Fixes: 4e197ee880c2 ("clk: imx6ul: add ethernet refclock mux support") Link: https://lore.kernel.org/lkml/20230306020226.GC143566@dragon/T/ Signed-off-by: Sebastien Laveze Reviewed-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240528151434.227602-1-slaveze@smartandconnective.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx6ul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index f9394e94f69d..05c7a82b751f 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -542,8 +542,8 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) clk_set_parent(hws[IMX6UL_CLK_ENFC_SEL]->clk, hws[IMX6UL_CLK_PLL2_PFD2]->clk); - clk_set_parent(hws[IMX6UL_CLK_ENET1_REF_SEL]->clk, hws[IMX6UL_CLK_ENET_REF]->clk); - clk_set_parent(hws[IMX6UL_CLK_ENET2_REF_SEL]->clk, hws[IMX6UL_CLK_ENET2_REF]->clk); + clk_set_parent(hws[IMX6UL_CLK_ENET1_REF_SEL]->clk, hws[IMX6UL_CLK_ENET1_REF_125M]->clk); + clk_set_parent(hws[IMX6UL_CLK_ENET2_REF_SEL]->clk, hws[IMX6UL_CLK_ENET2_REF_125M]->clk); imx_register_uart_clocks(); } From 554c590d229d8c789df4b2eaec487539a1894fd2 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 22 Dec 2023 16:48:24 +0100 Subject: [PATCH 253/534] clk: imx: composite-8m: Less function calls in __imx8m_clk_hw_composite() after error detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fed6bf52c86df27ad4f39a72cdad8c27da9a50ba ] The function “kfree” was called in up to three cases by the function “__imx8m_clk_hw_composite” during error handling even if the passed variables contained a null pointer. Adjust jump targets according to the Linux coding style convention. Signed-off-by: Markus Elfring Reviewed-by: Peng Fan Link: https://lore.kernel.org/r/147ca1e6-69f3-4586-b5b3-b69f9574a862@web.de Signed-off-by: Abel Vesa Stable-dep-of: 8f32e9dd0916 ("clk: imx: composite-8m: Enable gate clk with mcore_booted") Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-composite-8m.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c index 27a08c50ac1d..eb5392f7a257 100644 --- a/drivers/clk/imx/clk-composite-8m.c +++ b/drivers/clk/imx/clk-composite-8m.c @@ -220,7 +220,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, mux = kzalloc(sizeof(*mux), GFP_KERNEL); if (!mux) - goto fail; + return ERR_CAST(hw); mux_hw = &mux->hw; mux->reg = reg; @@ -230,7 +230,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, div = kzalloc(sizeof(*div), GFP_KERNEL); if (!div) - goto fail; + goto free_mux; div_hw = &div->hw; div->reg = reg; @@ -260,7 +260,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, if (!mcore_booted) { gate = kzalloc(sizeof(*gate), GFP_KERNEL); if (!gate) - goto fail; + goto free_div; gate_hw = &gate->hw; gate->reg = reg; @@ -272,13 +272,15 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, mux_hw, mux_ops, div_hw, divider_ops, gate_hw, &clk_gate_ops, flags); if (IS_ERR(hw)) - goto fail; + goto free_gate; return hw; -fail: +free_gate: kfree(gate); +free_div: kfree(div); +free_mux: kfree(mux); return ERR_CAST(hw); } From 73034d130b0a865abc65029b2ea9f99af1aa00a8 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 7 Jun 2024 21:33:33 +0800 Subject: [PATCH 254/534] clk: imx: composite-8m: Enable gate clk with mcore_booted [ Upstream commit 8f32e9dd0916eb3fd4bcf550ed1d04542a65cb9e ] Bootloader might disable some CCM ROOT Slices. So if mcore_booted set with display CCM ROOT disabled by Bootloader, kernel display BLK CTRL driver imx8m_blk_ctrl_driver_init may hang the system because the BUS clk is disabled. Add back gate ops, but with disable doing nothing, then the CCM ROOT will be enabled when used. Fixes: bb7e897b002a ("clk: imx8m: check mcore_booted before register clk") Reviewed-by: Ye Li Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-2-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-composite-8m.c | 51 ++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c index eb5392f7a257..ac5e9d60acb8 100644 --- a/drivers/clk/imx/clk-composite-8m.c +++ b/drivers/clk/imx/clk-composite-8m.c @@ -204,6 +204,34 @@ static const struct clk_ops imx8m_clk_composite_mux_ops = { .determine_rate = imx8m_clk_composite_mux_determine_rate, }; +static int imx8m_clk_composite_gate_enable(struct clk_hw *hw) +{ + struct clk_gate *gate = to_clk_gate(hw); + unsigned long flags; + u32 val; + + spin_lock_irqsave(gate->lock, flags); + + val = readl(gate->reg); + val |= BIT(gate->bit_idx); + writel(val, gate->reg); + + spin_unlock_irqrestore(gate->lock, flags); + + return 0; +} + +static void imx8m_clk_composite_gate_disable(struct clk_hw *hw) +{ + /* composite clk requires the disable hook */ +} + +static const struct clk_ops imx8m_clk_composite_gate_ops = { + .enable = imx8m_clk_composite_gate_enable, + .disable = imx8m_clk_composite_gate_disable, + .is_enabled = clk_gate_is_enabled, +}; + struct clk_hw *__imx8m_clk_hw_composite(const char *name, const char * const *parent_names, int num_parents, void __iomem *reg, @@ -217,6 +245,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, struct clk_mux *mux = NULL; const struct clk_ops *divider_ops; const struct clk_ops *mux_ops; + const struct clk_ops *gate_ops; mux = kzalloc(sizeof(*mux), GFP_KERNEL); if (!mux) @@ -257,20 +286,22 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name, div->flags = CLK_DIVIDER_ROUND_CLOSEST; /* skip registering the gate ops if M4 is enabled */ - if (!mcore_booted) { - gate = kzalloc(sizeof(*gate), GFP_KERNEL); - if (!gate) - goto free_div; + gate = kzalloc(sizeof(*gate), GFP_KERNEL); + if (!gate) + goto free_div; - gate_hw = &gate->hw; - gate->reg = reg; - gate->bit_idx = PCG_CGC_SHIFT; - gate->lock = &imx_ccm_lock; - } + gate_hw = &gate->hw; + gate->reg = reg; + gate->bit_idx = PCG_CGC_SHIFT; + gate->lock = &imx_ccm_lock; + if (!mcore_booted) + gate_ops = &clk_gate_ops; + else + gate_ops = &imx8m_clk_composite_gate_ops; hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, mux_hw, mux_ops, div_hw, - divider_ops, gate_hw, &clk_gate_ops, flags); + divider_ops, gate_hw, gate_ops, flags); if (IS_ERR(hw)) goto free_gate; From c1eb71fd985d01b0f1a285f31747183f70ba16f4 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 7 Jun 2024 21:33:34 +0800 Subject: [PATCH 255/534] clk: imx: composite-93: keep root clock on when mcore enabled [ Upstream commit d342df11726bfac9c3a9d2037afa508ac0e9e44e ] Previously we assumed that the root clock slice is enabled by default when kernel boot up. But the bootloader may disable the clocks before jump into kernel. The gate ops should be registered rather than NULL to make sure the disabled clock can be enabled when kernel boot up. Refine the code to skip disable the clock if mcore booted. Fixes: a740d7350ff7 ("clk: imx: imx93: add mcore_booted module paratemter") Signed-off-by: Jacky Bai Reviewed-by: Peng Fan Tested-by: Chancel Liu Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-3-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-composite-93.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/clk/imx/clk-composite-93.c b/drivers/clk/imx/clk-composite-93.c index 81164bdcd6cc..6c6c5a30f328 100644 --- a/drivers/clk/imx/clk-composite-93.c +++ b/drivers/clk/imx/clk-composite-93.c @@ -76,6 +76,13 @@ static int imx93_clk_composite_gate_enable(struct clk_hw *hw) static void imx93_clk_composite_gate_disable(struct clk_hw *hw) { + /* + * Skip disable the root clock gate if mcore enabled. + * The root clock may be used by the mcore. + */ + if (mcore_booted) + return; + imx93_clk_composite_gate_endisable(hw, 0); } @@ -222,7 +229,7 @@ struct clk_hw *imx93_clk_composite_flags(const char *name, const char * const *p hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, mux_hw, &clk_mux_ro_ops, div_hw, &clk_divider_ro_ops, NULL, NULL, flags); - } else if (!mcore_booted) { + } else { gate = kzalloc(sizeof(*gate), GFP_KERNEL); if (!gate) goto fail; @@ -238,12 +245,6 @@ struct clk_hw *imx93_clk_composite_flags(const char *name, const char * const *p &imx93_clk_composite_divider_ops, gate_hw, &imx93_clk_composite_gate_ops, flags | CLK_SET_RATE_NO_REPARENT); - } else { - hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, - mux_hw, &imx93_clk_composite_mux_ops, div_hw, - &imx93_clk_composite_divider_ops, NULL, - &imx93_clk_composite_gate_ops, - flags | CLK_SET_RATE_NO_REPARENT); } if (IS_ERR(hw)) From ed323659a011de41a2a58a86f0a521117faa5c10 Mon Sep 17 00:00:00 2001 From: Ye Li Date: Fri, 7 Jun 2024 21:33:35 +0800 Subject: [PATCH 256/534] clk: imx: composite-7ulp: Check the PCC present bit [ Upstream commit 4717ccadb51e2630790dddd222830702de17f090 ] When some module is disabled by fuse, its PCC PR bit is default 0 and PCC is not operational. Any write to this PCC will cause SError. Fixes: b40ba8065347 ("clk: imx: Update the compsite driver to support imx8ulp") Reviewed-by: Peng Fan Signed-off-by: Ye Li Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-4-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-composite-7ulp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/clk/imx/clk-composite-7ulp.c b/drivers/clk/imx/clk-composite-7ulp.c index e208ddc51133..db7f40b07d1a 100644 --- a/drivers/clk/imx/clk-composite-7ulp.c +++ b/drivers/clk/imx/clk-composite-7ulp.c @@ -14,6 +14,7 @@ #include "../clk-fractional-divider.h" #include "clk.h" +#define PCG_PR_MASK BIT(31) #define PCG_PCS_SHIFT 24 #define PCG_PCS_MASK 0x7 #define PCG_CGC_SHIFT 30 @@ -78,6 +79,12 @@ static struct clk_hw *imx_ulp_clk_hw_composite(const char *name, struct clk_hw *hw; u32 val; + val = readl(reg); + if (!(val & PCG_PR_MASK)) { + pr_info("PCC PR is 0 for clk:%s, bypass\n", name); + return 0; + } + if (mux_present) { mux = kzalloc(sizeof(*mux), GFP_KERNEL); if (!mux) From 908165b5d369e386ee191a028a5551e22f23fe36 Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Fri, 7 Jun 2024 21:33:36 +0800 Subject: [PATCH 257/534] clk: imx: fracn-gppll: fix fractional part of PLL getting lost [ Upstream commit 7622f888fca125ae46f695edf918798ebc0506c5 ] Fractional part of PLL gets lost after re-enabling the PLL. the MFN can NOT be automatically loaded when doing frac PLL enable/disable, So when re-enable PLL, configure mfn explicitly. Fixes: 1b26cb8a77a4 ("clk: imx: support fracn gppll") Signed-off-by: Pengfei Li Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-5-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-fracn-gppll.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/imx/clk-fracn-gppll.c b/drivers/clk/imx/clk-fracn-gppll.c index 44462ab50e51..1becba2b62d0 100644 --- a/drivers/clk/imx/clk-fracn-gppll.c +++ b/drivers/clk/imx/clk-fracn-gppll.c @@ -291,6 +291,10 @@ static int clk_fracn_gppll_prepare(struct clk_hw *hw) if (val & POWERUP_MASK) return 0; + if (pll->flags & CLK_FRACN_GPPLL_FRACN) + writel_relaxed(readl_relaxed(pll->base + PLL_NUMERATOR), + pll->base + PLL_NUMERATOR); + val |= CLKMUX_BYPASS; writel_relaxed(val, pll->base + PLL_CTRL); From 5b44298953f35fcd2b480db9de70ee8ec4ee7041 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Fri, 7 Jun 2024 21:33:38 +0800 Subject: [PATCH 258/534] clk: imx: imx8mp: fix clock tree update of TF-A managed clocks [ Upstream commit 3d29036853b9cb07ac49e8261fca82a940be5c41 ] On the i.MX8M*, the TF-A exposes a SiP (Silicon Provider) service for DDR frequency scaling. The imx8m-ddrc-devfreq driver calls the SiP and then does clk_set_parent on the DDR muxes to synchronize the clock tree. since commit 936c383673b9 ("clk: imx: fix composite peripheral flags"), these TF-A managed muxes have SET_PARENT_GATE set, which results in imx8m-ddrc-devfreq's clk_set_parent after SiP failing with -EBUSY: clk_set_parent(dram_apb_src, sys1_pll_40m);(busfreq-imx8mq.c) commit 926bf91248dd ("clk: imx8m: fix clock tree update of TF-A managed clocks") adds this method and enables 8mm, 8mn and 8mq. i.MX8MP also needs it. This is safe to do, because updating the Linux clock tree to reflect reality will always be glitch-free. Another reason to this patch is that powersave image BT music requires dram to be 400MTS, so clk_set_parent(dram_alt_src, sys1_pll_800m); is required. Without this patch, it will not succeed. Fixes: 936c383673b9 ("clk: imx: fix composite peripheral flags") Signed-off-by: Zhipeng Wang Reviewed-by: Ahmad Fatoum Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-7-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8mp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 670aa2bab301..e561ff7b135f 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -551,8 +551,8 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) hws[IMX8MP_CLK_IPG_ROOT] = imx_clk_hw_divider2("ipg_root", "ahb_root", ccm_base + 0x9080, 0, 1); - hws[IMX8MP_CLK_DRAM_ALT] = imx8m_clk_hw_composite("dram_alt", imx8mp_dram_alt_sels, ccm_base + 0xa000); - hws[IMX8MP_CLK_DRAM_APB] = imx8m_clk_hw_composite_critical("dram_apb", imx8mp_dram_apb_sels, ccm_base + 0xa080); + hws[IMX8MP_CLK_DRAM_ALT] = imx8m_clk_hw_fw_managed_composite("dram_alt", imx8mp_dram_alt_sels, ccm_base + 0xa000); + hws[IMX8MP_CLK_DRAM_APB] = imx8m_clk_hw_fw_managed_composite_critical("dram_apb", imx8mp_dram_apb_sels, ccm_base + 0xa080); hws[IMX8MP_CLK_VPU_G1] = imx8m_clk_hw_composite("vpu_g1", imx8mp_vpu_g1_sels, ccm_base + 0xa100); hws[IMX8MP_CLK_VPU_G2] = imx8m_clk_hw_composite("vpu_g2", imx8mp_vpu_g2_sels, ccm_base + 0xa180); hws[IMX8MP_CLK_CAN1] = imx8m_clk_hw_composite("can1", imx8mp_can1_sels, ccm_base + 0xa200); From d64513b2dab17470a2c68de9e149d8a3ac356e66 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 7 Jun 2024 21:33:45 +0800 Subject: [PATCH 259/534] clk: imx: imx8qxp: Register dc0_bypass0_clk before disp clk [ Upstream commit e61352d5ecdc0da2e7253121c15d9a3e040f78a1 ] The initialization order of SCU clocks affects the sequence of SCU clock resume. If there are no other effects, the earlier the initialization, the earlier the resume. During SCU clock resume, the clock rate is restored. As SCFW guidelines, configure the parent clock rate before configuring the child rate. Fixes: 91e916771de0 ("clk: imx: scu: remove legacy scu clock binding support") Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-14-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8qxp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 245761e01897..bc75e37d818d 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -199,11 +199,11 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) imx_clk_scu("usb3_lpm_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MISC); /* Display controller SS */ - imx_clk_scu2("dc0_disp0_clk", dc0_sels, ARRAY_SIZE(dc0_sels), IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0); - imx_clk_scu2("dc0_disp1_clk", dc0_sels, ARRAY_SIZE(dc0_sels), IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1); imx_clk_scu("dc0_pll0_clk", IMX_SC_R_DC_0_PLL_0, IMX_SC_PM_CLK_PLL); imx_clk_scu("dc0_pll1_clk", IMX_SC_R_DC_0_PLL_1, IMX_SC_PM_CLK_PLL); imx_clk_scu("dc0_bypass0_clk", IMX_SC_R_DC_0_VIDEO0, IMX_SC_PM_CLK_BYPASS); + imx_clk_scu2("dc0_disp0_clk", dc0_sels, ARRAY_SIZE(dc0_sels), IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0); + imx_clk_scu2("dc0_disp1_clk", dc0_sels, ARRAY_SIZE(dc0_sels), IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1); imx_clk_scu("dc0_bypass1_clk", IMX_SC_R_DC_0_VIDEO1, IMX_SC_PM_CLK_BYPASS); imx_clk_scu2("dc1_disp0_clk", dc1_sels, ARRAY_SIZE(dc1_sels), IMX_SC_R_DC_1, IMX_SC_PM_CLK_MISC0); From af70d9395d7c4a3895f1d8d96552f5673b297a09 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 7 Jun 2024 21:33:46 +0800 Subject: [PATCH 260/534] clk: imx: imx8qxp: Parent should be initialized earlier than the clock [ Upstream commit 766c386c16c9899461b83573a06380d364c6e261 ] The initialization order of SCU clocks affects the sequence of SCU clock resume. If there are no other effects, the earlier the initialization, the earlier the resume. During SCU clock resume, the clock rate is restored. As SCFW guidelines, configure the parent clock rate before configuring the child rate. Fixes: babfaa9556d7 ("clk: imx: scu: add more scu clocks") Signed-off-by: Peng Fan Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240607133347.3291040-15-peng.fan@oss.nxp.com Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8qxp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index bc75e37d818d..6d458995f388 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -165,8 +165,8 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) imx_clk_scu("pwm_clk", IMX_SC_R_LCD_0_PWM_0, IMX_SC_PM_CLK_PER); imx_clk_scu("elcdif_pll", IMX_SC_R_ELCDIF_PLL, IMX_SC_PM_CLK_PLL); imx_clk_scu2("lcd_clk", lcd_sels, ARRAY_SIZE(lcd_sels), IMX_SC_R_LCD_0, IMX_SC_PM_CLK_PER); - imx_clk_scu2("lcd_pxl_clk", lcd_pxl_sels, ARRAY_SIZE(lcd_pxl_sels), IMX_SC_R_LCD_0, IMX_SC_PM_CLK_MISC0); imx_clk_scu("lcd_pxl_bypass_div_clk", IMX_SC_R_LCD_0, IMX_SC_PM_CLK_BYPASS); + imx_clk_scu2("lcd_pxl_clk", lcd_pxl_sels, ARRAY_SIZE(lcd_pxl_sels), IMX_SC_R_LCD_0, IMX_SC_PM_CLK_MISC0); /* Audio SS */ imx_clk_scu("audio_pll0_clk", IMX_SC_R_AUDIO_PLL_0, IMX_SC_PM_CLK_PLL); @@ -206,11 +206,11 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) imx_clk_scu2("dc0_disp1_clk", dc0_sels, ARRAY_SIZE(dc0_sels), IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1); imx_clk_scu("dc0_bypass1_clk", IMX_SC_R_DC_0_VIDEO1, IMX_SC_PM_CLK_BYPASS); - imx_clk_scu2("dc1_disp0_clk", dc1_sels, ARRAY_SIZE(dc1_sels), IMX_SC_R_DC_1, IMX_SC_PM_CLK_MISC0); - imx_clk_scu2("dc1_disp1_clk", dc1_sels, ARRAY_SIZE(dc1_sels), IMX_SC_R_DC_1, IMX_SC_PM_CLK_MISC1); imx_clk_scu("dc1_pll0_clk", IMX_SC_R_DC_1_PLL_0, IMX_SC_PM_CLK_PLL); imx_clk_scu("dc1_pll1_clk", IMX_SC_R_DC_1_PLL_1, IMX_SC_PM_CLK_PLL); imx_clk_scu("dc1_bypass0_clk", IMX_SC_R_DC_1_VIDEO0, IMX_SC_PM_CLK_BYPASS); + imx_clk_scu2("dc1_disp0_clk", dc1_sels, ARRAY_SIZE(dc1_sels), IMX_SC_R_DC_1, IMX_SC_PM_CLK_MISC0); + imx_clk_scu2("dc1_disp1_clk", dc1_sels, ARRAY_SIZE(dc1_sels), IMX_SC_R_DC_1, IMX_SC_PM_CLK_MISC1); imx_clk_scu("dc1_bypass1_clk", IMX_SC_R_DC_1_VIDEO1, IMX_SC_PM_CLK_BYPASS); /* MIPI-LVDS SS */ From 2941577c764bc3576fa1fe415beeee81eb5f76ef Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 19 Jul 2024 16:36:11 +0800 Subject: [PATCH 261/534] remoteproc: imx_rproc: Correct ddr alias for i.MX8M [ Upstream commit c901f817792822eda9cec23814a4621fa3e66695 ] The DDR Alias address should be 0x40000000 according to RM, so correct it. Fixes: 4ab8f9607aad ("remoteproc: imx_rproc: support i.MX8MQ/M") Reported-by: Terry Lv Reviewed-by: Iuliana Prodan Signed-off-by: Peng Fan Reviewed-by: Daniel Baluta Link: https://lore.kernel.org/r/20240719-imx_rproc-v2-1-10d0268c7eb1@nxp.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/imx_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index cfee164dd645..ac32b46b4e0f 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -213,7 +213,7 @@ static const struct imx_rproc_att imx_rproc_att_imx8mq[] = { /* QSPI Code - alias */ { 0x08000000, 0x08000000, 0x08000000, 0 }, /* DDR (Code) - alias */ - { 0x10000000, 0x80000000, 0x0FFE0000, 0 }, + { 0x10000000, 0x40000000, 0x0FFE0000, 0 }, /* TCML */ { 0x1FFE0000, 0x007E0000, 0x00020000, ATT_OWN | ATT_IOMEM}, /* TCMU */ From d271e66f74b5f5c9b3a1bece514beb21e6c418c1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 19 Jul 2024 16:36:13 +0800 Subject: [PATCH 262/534] remoteproc: imx_rproc: Initialize workqueue earlier [ Upstream commit 858e57c1d3dd7b92cc0fa692ba130a0a5d57e49d ] Initialize workqueue before requesting mailbox channel, otherwise if mailbox interrupt comes before workqueue ready, the imx_rproc_rx_callback will trigger issue. Fixes: 2df7062002d0 ("remoteproc: imx_proc: enable virtio/mailbox") Signed-off-by: Peng Fan Reviewed-by: Daniel Baluta Link: https://lore.kernel.org/r/20240719-imx_rproc-v2-3-10d0268c7eb1@nxp.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/imx_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index ac32b46b4e0f..d68d4b22f528 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -1134,6 +1134,8 @@ static int imx_rproc_probe(struct platform_device *pdev) goto err_put_rproc; } + INIT_WORK(&priv->rproc_work, imx_rproc_vq_work); + ret = imx_rproc_xtr_mbox_init(rproc); if (ret) goto err_put_wkq; @@ -1152,8 +1154,6 @@ static int imx_rproc_probe(struct platform_device *pdev) if (ret) goto err_put_scu; - INIT_WORK(&priv->rproc_work, imx_rproc_vq_work); - if (rproc->state != RPROC_DETACHED) rproc->auto_boot = of_property_read_bool(np, "fsl,auto-boot"); From 77c859e8b8a9bdf42b21ef567ee47c45872edbf0 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sat, 15 Jun 2024 17:03:53 +0000 Subject: [PATCH 263/534] clk: rockchip: Set parent rate for DCLK_VOP clock on RK3228 [ Upstream commit 1d34b9757523c1ad547bd6d040381f62d74a3189 ] Similar to DCLK_LCDC on RK3328, the DCLK_VOP on RK3228 is typically parented by the hdmiphy clk and it is expected that the DCLK_VOP and hdmiphy clk rate are kept in sync. Use CLK_SET_RATE_PARENT and CLK_SET_RATE_NO_REPARENT flags, same as used on RK3328, to make full use of all possible supported display modes. Fixes: 0a9d4ac08ebc ("clk: rockchip: set the clock ids for RK3228 VOP") Fixes: 307a2e9ac524 ("clk: rockchip: add clock controller for rk3228") Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20240615170417.3134517-3-jonas@kwiboo.se Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- drivers/clk/rockchip/clk-rk3228.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c index a24a35553e13..7343d2d7676b 100644 --- a/drivers/clk/rockchip/clk-rk3228.c +++ b/drivers/clk/rockchip/clk-rk3228.c @@ -409,7 +409,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = { RK2928_CLKSEL_CON(29), 0, 3, DFLAGS), DIV(0, "sclk_vop_pre", "sclk_vop_src", 0, RK2928_CLKSEL_CON(27), 8, 8, DFLAGS), - MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, 0, + MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, RK2928_CLKSEL_CON(27), 1, 1, MFLAGS), FACTOR(0, "xin12m", "xin24m", 0, 1, 2), From 59938d4f05f4f6abdde2e7e6d3f809edf15e01bd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 17 Jul 2024 13:04:28 +0300 Subject: [PATCH 264/534] clk: qcom: dispcc-sm8550: fix several supposed typos [ Upstream commit 7b6a4b907297b28727933493b9e0c95494504634 ] Fix seveal odd-looking places in SM8550's dispcc driver: - duplicate entries in disp_cc_parent_map_4 and disp_cc_parent_map_5 - using &disp_cc_mdss_dptx0_link_div_clk_src as a source for disp_cc_mdss_dptx1_usb_router_link_intf_clk The SM8650 driver has been used as a reference. Fixes: 90114ca11476 ("clk: qcom: add SM8550 DISPCC driver") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-1-5c4a3128c40b@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sm8550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sm8550.c b/drivers/clk/qcom/dispcc-sm8550.c index b9edeb2a221d..e868c5c42609 100644 --- a/drivers/clk/qcom/dispcc-sm8550.c +++ b/drivers/clk/qcom/dispcc-sm8550.c @@ -196,7 +196,7 @@ static const struct clk_parent_data disp_cc_parent_data_3[] = { static const struct parent_map disp_cc_parent_map_4[] = { { P_BI_TCXO, 0 }, { P_DP0_PHY_PLL_LINK_CLK, 1 }, - { P_DP1_PHY_PLL_VCO_DIV_CLK, 2 }, + { P_DP0_PHY_PLL_VCO_DIV_CLK, 2 }, { P_DP3_PHY_PLL_VCO_DIV_CLK, 3 }, { P_DP1_PHY_PLL_VCO_DIV_CLK, 4 }, { P_DP2_PHY_PLL_VCO_DIV_CLK, 6 }, @@ -213,7 +213,7 @@ static const struct clk_parent_data disp_cc_parent_data_4[] = { static const struct parent_map disp_cc_parent_map_5[] = { { P_BI_TCXO, 0 }, - { P_DSI0_PHY_PLL_OUT_BYTECLK, 4 }, + { P_DSI0_PHY_PLL_OUT_BYTECLK, 2 }, { P_DSI1_PHY_PLL_OUT_BYTECLK, 4 }, }; From 65a25e42a4917e025b460ff1049c30e9c508edb3 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 17 Jul 2024 13:04:29 +0300 Subject: [PATCH 265/534] clk: qcom: dispcc-sm8550: use rcg2_ops for mdss_dptx1_aux_clk_src [ Upstream commit cb4c00698f2f27d99a69adcce659370ca286cf2a ] clk_dp_ops should only be used for DisplayPort pixel clocks. Use clk_rcg2_ops for disp_cc_mdss_dptx1_aux_clk_src. Fixes: 90114ca11476 ("clk: qcom: add SM8550 DISPCC driver") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-2-5c4a3128c40b@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sm8550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/dispcc-sm8550.c b/drivers/clk/qcom/dispcc-sm8550.c index e868c5c42609..72558ef25e9e 100644 --- a/drivers/clk/qcom/dispcc-sm8550.c +++ b/drivers/clk/qcom/dispcc-sm8550.c @@ -400,7 +400,7 @@ static struct clk_rcg2 disp_cc_mdss_dptx1_aux_clk_src = { .parent_data = disp_cc_parent_data_0, .num_parents = ARRAY_SIZE(disp_cc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_dp_ops, + .ops = &clk_rcg2_ops, }, }; From ffb0ae195b28b2db756552565c63c875263e9cdc Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 17 Jul 2024 13:04:31 +0300 Subject: [PATCH 266/534] clk: qcom: dispcc-sm8650: Update the GDSC flags [ Upstream commit 7de10ddbdb9d03651cff5fbdc8cf01837c698526 ] Add missing POLL_CFG_GDSCR to the MDSS GDSC flags. Fixes: 90114ca11476 ("clk: qcom: add SM8550 DISPCC driver") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-4-5c4a3128c40b@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sm8550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sm8550.c b/drivers/clk/qcom/dispcc-sm8550.c index 72558ef25e9e..9b63d62057ed 100644 --- a/drivers/clk/qcom/dispcc-sm8550.c +++ b/drivers/clk/qcom/dispcc-sm8550.c @@ -1611,7 +1611,7 @@ static struct gdsc mdss_gdsc = { .name = "mdss_gdsc", }, .pwrsts = PWRSTS_OFF_ON, - .flags = HW_CTRL | RETAIN_FF_ENABLE, + .flags = POLL_CFG_GDSCR | HW_CTRL | RETAIN_FF_ENABLE, }; static struct gdsc mdss_int2_gdsc = { @@ -1620,7 +1620,7 @@ static struct gdsc mdss_int2_gdsc = { .name = "mdss_int2_gdsc", }, .pwrsts = PWRSTS_OFF_ON, - .flags = HW_CTRL | RETAIN_FF_ENABLE, + .flags = POLL_CFG_GDSCR | HW_CTRL | RETAIN_FF_ENABLE, }; static struct clk_regmap *disp_cc_sm8550_clocks[] = { From 676bf8fcf387c3780aa3e7b3eda17ec19a6b1a34 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 17 Jul 2024 13:04:32 +0300 Subject: [PATCH 267/534] clk: qcom: dispcc-sm8550: use rcg2_shared_ops for ESC RCGs [ Upstream commit c8bee3ff6c9220092b646ff929f9c832c1adab6d ] Follow the recommendations and park disp_cc_mdss_esc[01]_clk_src to the XO instead of disabling the clocks by using the clk_rcg2_shared_ops. Fixes: 90114ca11476 ("clk: qcom: add SM8550 DISPCC driver") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240717-dispcc-sm8550-fixes-v2-5-5c4a3128c40b@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sm8550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sm8550.c b/drivers/clk/qcom/dispcc-sm8550.c index 9b63d62057ed..95b4c0548f50 100644 --- a/drivers/clk/qcom/dispcc-sm8550.c +++ b/drivers/clk/qcom/dispcc-sm8550.c @@ -562,7 +562,7 @@ static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = { .parent_data = disp_cc_parent_data_5, .num_parents = ARRAY_SIZE(disp_cc_parent_data_5), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -577,7 +577,7 @@ static struct clk_rcg2 disp_cc_mdss_esc1_clk_src = { .parent_data = disp_cc_parent_data_5, .num_parents = ARRAY_SIZE(disp_cc_parent_data_5), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; From bbf297b4cdc105e264da6a4ea2d9c1904d06576d Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 21 Jul 2024 17:19:03 +0200 Subject: [PATCH 268/534] leds: bd2606mvv: Fix device child node usage in bd2606mvv_probe() [ Upstream commit ffbf1fcb421429916a861cfc25dfe0c6387dda75 ] The current implementation accesses the `child` fwnode handle outside of fwnode_for_each_available_child_node() without incrementing its refcount. Add the missing call to `fwnode_handle_get(child)`. The cleanup process where `child` is accessed is not right either because a single call to `fwnode_handle_put()` is carried out in case of an error, ignoring unasigned nodes at the point when the error happens. Keep `child` inside of the first loop, and use the helper pointer that receives references via `fwnode_handle_get()` to handle the child nodes within the second loop. Moreover, the iterated nodes are direct children of the device node, and the `device_for_each_child_node()` macro accounts for child node availability. By restricting `child` to live within that loop, the scoped version of it can be used to simplify the error handling. `fwnode_for_each_available_child_node()` is meant to access the child nodes of an fwnode, and therefore not direct child nodes of the device node. Use `device_for_each_child_node_scoped()` to indicate device's direct child nodes. Fixes: 8325642d2757 ("leds: bd2606mvv: Driver for the Rohm 6 Channel i2c LED driver") Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240721-device_for_each_child_node-available-v2-3-f33748fd8b2d@gmail.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/leds-bd2606mvv.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/leds/leds-bd2606mvv.c b/drivers/leds/leds-bd2606mvv.c index 3fda712d2f80..c1181a35d0f7 100644 --- a/drivers/leds/leds-bd2606mvv.c +++ b/drivers/leds/leds-bd2606mvv.c @@ -69,16 +69,14 @@ static const struct regmap_config bd2606mvv_regmap = { static int bd2606mvv_probe(struct i2c_client *client) { - struct fwnode_handle *np, *child; struct device *dev = &client->dev; struct bd2606mvv_priv *priv; struct fwnode_handle *led_fwnodes[BD2606_MAX_LEDS] = { 0 }; int active_pairs[BD2606_MAX_LEDS / 2] = { 0 }; int err, reg; - int i; + int i, j; - np = dev_fwnode(dev); - if (!np) + if (!dev_fwnode(dev)) return -ENODEV; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -94,20 +92,18 @@ static int bd2606mvv_probe(struct i2c_client *client) i2c_set_clientdata(client, priv); - fwnode_for_each_available_child_node(np, child) { + device_for_each_child_node_scoped(dev, child) { struct bd2606mvv_led *led; err = fwnode_property_read_u32(child, "reg", ®); - if (err) { - fwnode_handle_put(child); + if (err) return err; - } - if (reg < 0 || reg >= BD2606_MAX_LEDS || led_fwnodes[reg]) { - fwnode_handle_put(child); + + if (reg < 0 || reg >= BD2606_MAX_LEDS || led_fwnodes[reg]) return -EINVAL; - } + led = &priv->leds[reg]; - led_fwnodes[reg] = child; + led_fwnodes[reg] = fwnode_handle_get(child); active_pairs[reg / 2]++; led->priv = priv; led->led_no = reg; @@ -130,7 +126,8 @@ static int bd2606mvv_probe(struct i2c_client *client) &priv->leds[i].ldev, &init_data); if (err < 0) { - fwnode_handle_put(child); + for (j = i; j < BD2606_MAX_LEDS; j++) + fwnode_handle_put(led_fwnodes[j]); return dev_err_probe(dev, err, "couldn't register LED %s\n", priv->leds[i].ldev.name); From a12e8a92909e498931a3c4a5e54e2f45139124b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 9 Oct 2023 10:38:56 +0200 Subject: [PATCH 269/534] pinctrl: ti: ti-iodelay: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 93650550dff9d1a3b88c553f8adb81dc89778977 ] The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231009083856.222030-21-u.kleine-koenig@pengutronix.de Signed-off-by: Linus Walleij Stable-dep-of: a9f2b249adee ("pinctrl: ti: ti-iodelay: Fix some error handling paths") Signed-off-by: Sasha Levin --- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 5370bbdf2e1a..81ae6737928a 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -900,23 +900,19 @@ exit_out: /** * ti_iodelay_remove() - standard remove * @pdev: platform device - * - * Return: 0 if all went fine, else appropriate error value. */ -static int ti_iodelay_remove(struct platform_device *pdev) +static void ti_iodelay_remove(struct platform_device *pdev) { struct ti_iodelay_device *iod = platform_get_drvdata(pdev); ti_iodelay_pinconf_deinit_dev(iod); /* Expect other allocations to be freed by devm */ - - return 0; } static struct platform_driver ti_iodelay_driver = { .probe = ti_iodelay_probe, - .remove = ti_iodelay_remove, + .remove_new = ti_iodelay_remove, .driver = { .name = DRIVER_NAME, .of_match_table = ti_iodelay_of_match, From ccc7cdf4963439eac47f3cb678f64083f916bfc3 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Oct 2023 12:29:13 -0500 Subject: [PATCH 270/534] pinctrl: Use device_get_match_data() [ Upstream commit 63bffc2d3a99eaabc786c513eea71be3f597f175 ] Use preferred device_get_match_data() instead of of_match_device() to get the driver match data. With this, adjust the includes to explicitly include the correct headers. Signed-off-by: Rob Herring Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231009172923.2457844-18-robh@kernel.org Signed-off-by: Linus Walleij Stable-dep-of: a9f2b249adee ("pinctrl: ti: ti-iodelay: Fix some error handling paths") Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-ns.c | 8 ++------ drivers/pinctrl/berlin/berlin-bg2.c | 8 +++----- drivers/pinctrl/berlin/berlin-bg2cd.c | 8 +++----- drivers/pinctrl/berlin/berlin-bg2q.c | 8 +++----- drivers/pinctrl/berlin/berlin-bg4ct.c | 9 +++++---- drivers/pinctrl/berlin/pinctrl-as370.c | 9 +++++---- drivers/pinctrl/mvebu/pinctrl-armada-38x.c | 9 ++------- drivers/pinctrl/mvebu/pinctrl-armada-39x.c | 9 ++------- drivers/pinctrl/mvebu/pinctrl-armada-ap806.c | 5 +---- drivers/pinctrl/mvebu/pinctrl-armada-cp110.c | 6 ++---- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 9 ++------- drivers/pinctrl/mvebu/pinctrl-dove.c | 6 ++---- drivers/pinctrl/mvebu/pinctrl-kirkwood.c | 7 ++----- drivers/pinctrl/mvebu/pinctrl-orion.c | 7 ++----- drivers/pinctrl/nomadik/pinctrl-abx500.c | 9 ++------- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 10 ++++------ drivers/pinctrl/pinctrl-at91.c | 11 +++++------ drivers/pinctrl/pinctrl-xway.c | 11 ++++------- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 18 ++++++++---------- 19 files changed, 59 insertions(+), 108 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index f80630a74d34..d099a7f25f64 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -7,11 +7,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include "../core.h" @@ -208,7 +208,6 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = { static int ns_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - const struct of_device_id *of_id; struct ns_pinctrl *ns_pinctrl; struct pinctrl_desc *pctldesc; struct pinctrl_pin_desc *pin; @@ -225,10 +224,7 @@ static int ns_pinctrl_probe(struct platform_device *pdev) ns_pinctrl->dev = dev; - of_id = of_match_device(ns_pinctrl_of_match_table, dev); - if (!of_id) - return -EINVAL; - ns_pinctrl->chipset_flag = (uintptr_t)of_id->data; + ns_pinctrl->chipset_flag = (uintptr_t)device_get_match_data(dev); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cru_gpio_control"); diff --git a/drivers/pinctrl/berlin/berlin-bg2.c b/drivers/pinctrl/berlin/berlin-bg2.c index acbd413340e8..15aed4467627 100644 --- a/drivers/pinctrl/berlin/berlin-bg2.c +++ b/drivers/pinctrl/berlin/berlin-bg2.c @@ -8,8 +8,9 @@ */ #include -#include +#include #include +#include #include #include "berlin.h" @@ -227,10 +228,7 @@ static const struct of_device_id berlin2_pinctrl_match[] = { static int berlin2_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(berlin2_pinctrl_match, &pdev->dev); - - return berlin_pinctrl_probe(pdev, match->data); + return berlin_pinctrl_probe(pdev, device_get_match_data(&pdev->dev)); } static struct platform_driver berlin2_pinctrl_driver = { diff --git a/drivers/pinctrl/berlin/berlin-bg2cd.c b/drivers/pinctrl/berlin/berlin-bg2cd.c index c0f5d86d5d01..73a1d8c23088 100644 --- a/drivers/pinctrl/berlin/berlin-bg2cd.c +++ b/drivers/pinctrl/berlin/berlin-bg2cd.c @@ -8,8 +8,9 @@ */ #include -#include +#include #include +#include #include #include "berlin.h" @@ -172,10 +173,7 @@ static const struct of_device_id berlin2cd_pinctrl_match[] = { static int berlin2cd_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(berlin2cd_pinctrl_match, &pdev->dev); - - return berlin_pinctrl_probe(pdev, match->data); + return berlin_pinctrl_probe(pdev, device_get_match_data(&pdev->dev)); } static struct platform_driver berlin2cd_pinctrl_driver = { diff --git a/drivers/pinctrl/berlin/berlin-bg2q.c b/drivers/pinctrl/berlin/berlin-bg2q.c index 20a3216ede07..a5dbc8f279e7 100644 --- a/drivers/pinctrl/berlin/berlin-bg2q.c +++ b/drivers/pinctrl/berlin/berlin-bg2q.c @@ -8,8 +8,9 @@ */ #include -#include +#include #include +#include #include #include "berlin.h" @@ -389,10 +390,7 @@ static const struct of_device_id berlin2q_pinctrl_match[] = { static int berlin2q_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(berlin2q_pinctrl_match, &pdev->dev); - - return berlin_pinctrl_probe(pdev, match->data); + return berlin_pinctrl_probe(pdev, device_get_match_data(&pdev->dev)); } static struct platform_driver berlin2q_pinctrl_driver = { diff --git a/drivers/pinctrl/berlin/berlin-bg4ct.c b/drivers/pinctrl/berlin/berlin-bg4ct.c index 3026a3b3da2d..9bf0a54f2798 100644 --- a/drivers/pinctrl/berlin/berlin-bg4ct.c +++ b/drivers/pinctrl/berlin/berlin-bg4ct.c @@ -8,8 +8,9 @@ */ #include -#include +#include #include +#include #include #include "berlin.h" @@ -449,8 +450,8 @@ static const struct of_device_id berlin4ct_pinctrl_match[] = { static int berlin4ct_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(berlin4ct_pinctrl_match, &pdev->dev); + const struct berlin_pinctrl_desc *desc = + device_get_match_data(&pdev->dev); struct regmap_config *rmconfig; struct regmap *regmap; struct resource *res; @@ -473,7 +474,7 @@ static int berlin4ct_pinctrl_probe(struct platform_device *pdev) if (IS_ERR(regmap)) return PTR_ERR(regmap); - return berlin_pinctrl_probe_regmap(pdev, match->data, regmap); + return berlin_pinctrl_probe_regmap(pdev, desc, regmap); } static struct platform_driver berlin4ct_pinctrl_driver = { diff --git a/drivers/pinctrl/berlin/pinctrl-as370.c b/drivers/pinctrl/berlin/pinctrl-as370.c index b631c14813a7..fc0daec94e10 100644 --- a/drivers/pinctrl/berlin/pinctrl-as370.c +++ b/drivers/pinctrl/berlin/pinctrl-as370.c @@ -8,8 +8,9 @@ */ #include -#include +#include #include +#include #include #include "berlin.h" @@ -330,8 +331,8 @@ static const struct of_device_id as370_pinctrl_match[] = { static int as370_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(as370_pinctrl_match, &pdev->dev); + const struct berlin_pinctrl_desc *desc = + device_get_match_data(&pdev->dev); struct regmap_config *rmconfig; struct regmap *regmap; struct resource *res; @@ -354,7 +355,7 @@ static int as370_pinctrl_probe(struct platform_device *pdev) if (IS_ERR(regmap)) return PTR_ERR(regmap); - return berlin_pinctrl_probe_regmap(pdev, match->data, regmap); + return berlin_pinctrl_probe_regmap(pdev, desc, regmap); } static struct platform_driver as370_pinctrl_driver = { diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c index 040e418dbfc1..162dfc213669 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c @@ -12,8 +12,8 @@ #include #include #include -#include #include +#include #include "pinctrl-mvebu.h" @@ -404,13 +404,8 @@ static struct pinctrl_gpio_range armada_38x_mpp_gpio_ranges[] = { static int armada_38x_pinctrl_probe(struct platform_device *pdev) { struct mvebu_pinctrl_soc_info *soc = &armada_38x_pinctrl_info; - const struct of_device_id *match = - of_match_device(armada_38x_pinctrl_of_match, &pdev->dev); - if (!match) - return -ENODEV; - - soc->variant = (unsigned) match->data & 0xff; + soc->variant = (unsigned)device_get_match_data(&pdev->dev) & 0xff; soc->controls = armada_38x_mpp_controls; soc->ncontrols = ARRAY_SIZE(armada_38x_mpp_controls); soc->gpioranges = armada_38x_mpp_gpio_ranges; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c index c33f1cbaf661..d9c98faa7b0e 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c @@ -12,8 +12,8 @@ #include #include #include -#include #include +#include #include "pinctrl-mvebu.h" @@ -386,13 +386,8 @@ static struct pinctrl_gpio_range armada_39x_mpp_gpio_ranges[] = { static int armada_39x_pinctrl_probe(struct platform_device *pdev) { struct mvebu_pinctrl_soc_info *soc = &armada_39x_pinctrl_info; - const struct of_device_id *match = - of_match_device(armada_39x_pinctrl_of_match, &pdev->dev); - if (!match) - return -ENODEV; - - soc->variant = (unsigned) match->data & 0xff; + soc->variant = (unsigned)device_get_match_data(&pdev->dev) & 0xff; soc->controls = armada_39x_mpp_controls; soc->ncontrols = ARRAY_SIZE(armada_39x_mpp_controls); soc->gpioranges = armada_39x_mpp_gpio_ranges; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-ap806.c b/drivers/pinctrl/mvebu/pinctrl-armada-ap806.c index 89bab536717d..7becf2781a0b 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-ap806.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-ap806.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "pinctrl-mvebu.h" @@ -106,10 +105,8 @@ static struct pinctrl_gpio_range armada_ap806_mpp_gpio_ranges[] = { static int armada_ap806_pinctrl_probe(struct platform_device *pdev) { struct mvebu_pinctrl_soc_info *soc = &armada_ap806_pinctrl_info; - const struct of_device_id *match = - of_match_device(armada_ap806_pinctrl_of_match, &pdev->dev); - if (!match || !pdev->dev.parent) + if (!pdev->dev.parent) return -ENODEV; soc->variant = 0; /* no variants for Armada AP806 */ diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-cp110.c b/drivers/pinctrl/mvebu/pinctrl-armada-cp110.c index 8ba8f3e9121f..9a250c491f33 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-cp110.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-cp110.c @@ -12,9 +12,9 @@ #include #include #include -#include #include #include +#include #include "pinctrl-mvebu.h" @@ -638,8 +638,6 @@ static void mvebu_pinctrl_assign_variant(struct mvebu_mpp_mode *m, static int armada_cp110_pinctrl_probe(struct platform_device *pdev) { struct mvebu_pinctrl_soc_info *soc; - const struct of_device_id *match = - of_match_device(armada_cp110_pinctrl_of_match, &pdev->dev); int i; if (!pdev->dev.parent) @@ -650,7 +648,7 @@ static int armada_cp110_pinctrl_probe(struct platform_device *pdev) if (!soc) return -ENOMEM; - soc->variant = (unsigned long) match->data & 0xff; + soc->variant = (unsigned long)device_get_match_data(&pdev->dev) & 0xff; soc->controls = armada_cp110_mpp_controls; soc->ncontrols = ARRAY_SIZE(armada_cp110_mpp_controls); soc->modes = armada_cp110_mpp_modes; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index 48e2a6c56a83..487825bfd125 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include "pinctrl-mvebu.h" @@ -568,14 +568,9 @@ static int armada_xp_pinctrl_resume(struct platform_device *pdev) static int armada_xp_pinctrl_probe(struct platform_device *pdev) { struct mvebu_pinctrl_soc_info *soc = &armada_xp_pinctrl_info; - const struct of_device_id *match = - of_match_device(armada_xp_pinctrl_of_match, &pdev->dev); int nregs; - if (!match) - return -ENODEV; - - soc->variant = (unsigned) match->data & 0xff; + soc->variant = (unsigned)device_get_match_data(&pdev->dev) & 0xff; switch (soc->variant) { case V_MV78230: diff --git a/drivers/pinctrl/mvebu/pinctrl-dove.c b/drivers/pinctrl/mvebu/pinctrl-dove.c index bd74daa9ed66..1947da73e512 100644 --- a/drivers/pinctrl/mvebu/pinctrl-dove.c +++ b/drivers/pinctrl/mvebu/pinctrl-dove.c @@ -12,9 +12,9 @@ #include #include #include -#include #include #include +#include #include #include "pinctrl-mvebu.h" @@ -765,13 +765,11 @@ static int dove_pinctrl_probe(struct platform_device *pdev) { struct resource *res, *mpp_res; struct resource fb_res; - const struct of_device_id *match = - of_match_device(dove_pinctrl_of_match, &pdev->dev); struct mvebu_mpp_ctrl_data *mpp_data; void __iomem *base; int i; - pdev->dev.platform_data = (void *)match->data; + pdev->dev.platform_data = (void *)device_get_match_data(&pdev->dev); /* * General MPP Configuration Register is part of pdma registers. diff --git a/drivers/pinctrl/mvebu/pinctrl-kirkwood.c b/drivers/pinctrl/mvebu/pinctrl-kirkwood.c index d45c31f281c8..4789d7442f78 100644 --- a/drivers/pinctrl/mvebu/pinctrl-kirkwood.c +++ b/drivers/pinctrl/mvebu/pinctrl-kirkwood.c @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include "pinctrl-mvebu.h" @@ -470,10 +470,7 @@ static const struct of_device_id kirkwood_pinctrl_of_match[] = { static int kirkwood_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(kirkwood_pinctrl_of_match, &pdev->dev); - - pdev->dev.platform_data = (void *)match->data; + pdev->dev.platform_data = (void *)device_get_match_data(&pdev->dev); return mvebu_pinctrl_simple_mmio_probe(pdev); } diff --git a/drivers/pinctrl/mvebu/pinctrl-orion.c b/drivers/pinctrl/mvebu/pinctrl-orion.c index cc97d270be61..2b6ab7f2afc7 100644 --- a/drivers/pinctrl/mvebu/pinctrl-orion.c +++ b/drivers/pinctrl/mvebu/pinctrl-orion.c @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include "pinctrl-mvebu.h" @@ -218,10 +218,7 @@ static const struct of_device_id orion_pinctrl_of_match[] = { static int orion_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match = - of_match_device(orion_pinctrl_of_match, &pdev->dev); - - pdev->dev.platform_data = (void*)match->data; + pdev->dev.platform_data = (void*)device_get_match_data(&pdev->dev); mpp_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(mpp_base)) diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index 6b90051af206..0cfa74365733 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -985,7 +986,6 @@ static const struct of_device_id abx500_gpio_match[] = { static int abx500_gpio_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - const struct of_device_id *match; struct abx500_pinctrl *pct; unsigned int id = -1; int ret; @@ -1006,12 +1006,7 @@ static int abx500_gpio_probe(struct platform_device *pdev) pct->chip.parent = &pdev->dev; pct->chip.base = -1; /* Dynamic allocation */ - match = of_match_device(abx500_gpio_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "gpio dt not matching\n"); - return -ENODEV; - } - id = (unsigned long)match->data; + id = (unsigned long)device_get_match_data(&pdev->dev); /* Poke in other ASIC variants here */ switch (id) { diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index e7d33093994b..445c61a4a7e5 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -16,9 +16,11 @@ #include #include #include +#include #include -#include +#include #include +#include #include #include #include @@ -1840,7 +1842,6 @@ static int nmk_pinctrl_resume(struct device *dev) static int nmk_pinctrl_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct device_node *np = pdev->dev.of_node; struct device_node *prcm_np; struct nmk_pinctrl *npct; @@ -1851,10 +1852,7 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) if (!npct) return -ENOMEM; - match = of_match_device(nmk_pinctrl_match, &pdev->dev); - if (!match) - return -ENODEV; - version = (unsigned int) match->data; + version = (unsigned int)device_get_match_data(&pdev->dev); /* Poke in other ASIC variants here */ if (version == PINCTRL_NMK_STN8815) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index ad30fd47a4bb..d7b66928a4e5 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -12,10 +12,9 @@ #include #include #include -#include -#include -#include +#include #include +#include #include #include #include @@ -1302,8 +1301,8 @@ static int at91_pinctrl_probe_dt(struct platform_device *pdev, if (!np) return -ENODEV; - info->dev = dev; - info->ops = of_device_get_match_data(dev); + info->dev = &pdev->dev; + info->ops = device_get_match_data(&pdev->dev); at91_pinctrl_child_count(info, np); /* @@ -1848,7 +1847,7 @@ static int at91_gpio_probe(struct platform_device *pdev) if (IS_ERR(at91_chip->regbase)) return PTR_ERR(at91_chip->regbase); - at91_chip->ops = of_device_get_match_data(dev); + at91_chip->ops = device_get_match_data(dev); at91_chip->pioc_virq = irq; at91_chip->clock = devm_clk_get_enabled(dev, NULL); diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index cf0383f575d9..f4256a918165 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -11,12 +11,12 @@ #include #include #include -#include -#include +#include #include #include #include #include +#include #include "pinctrl-lantiq.h" @@ -1451,7 +1451,6 @@ MODULE_DEVICE_TABLE(of, xway_match); static int pinmux_xway_probe(struct platform_device *pdev) { - const struct of_device_id *match; const struct pinctrl_xway_soc *xway_soc; int ret, i; @@ -1460,10 +1459,8 @@ static int pinmux_xway_probe(struct platform_device *pdev) if (IS_ERR(xway_info.membase[0])) return PTR_ERR(xway_info.membase[0]); - match = of_match_device(xway_match, &pdev->dev); - if (match) - xway_soc = (const struct pinctrl_xway_soc *) match->data; - else + xway_soc = device_get_match_data(&pdev->dev); + if (!xway_soc) xway_soc = &danube_pinctrl; /* find out how many pads we have */ diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 81ae6737928a..ef9758638501 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -14,7 +14,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -822,7 +823,6 @@ static int ti_iodelay_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = of_node_get(dev->of_node); - const struct of_device_id *match; struct resource *res; struct ti_iodelay_device *iod; int ret = 0; @@ -833,20 +833,18 @@ static int ti_iodelay_probe(struct platform_device *pdev) goto exit_out; } - match = of_match_device(ti_iodelay_of_match, dev); - if (!match) { - ret = -EINVAL; - dev_err(dev, "No DATA match\n"); - goto exit_out; - } - iod = devm_kzalloc(dev, sizeof(*iod), GFP_KERNEL); if (!iod) { ret = -ENOMEM; goto exit_out; } iod->dev = dev; - iod->reg_data = match->data; + iod->reg_data = device_get_match_data(dev); + if (!iod->reg_data) { + ret = -EINVAL; + dev_err(dev, "No DATA match\n"); + goto exit_out; + } /* So far We can assume there is only 1 bank of registers */ iod->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); From 85427d5109c24a79db9239a7665896a4458edbeb Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 27 Jun 2024 21:17:19 +0800 Subject: [PATCH 271/534] pinctrl: ti: iodelay: Use scope based of_node_put() cleanups [ Upstream commit 791a8bb202a85f707c20ef04a471519e35f089dc ] Use scope based of_node_put() cleanup to simplify code. Signed-off-by: Peng Fan Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/20240627131721.678727-2-peng.fan@oss.nxp.com Signed-off-by: Linus Walleij Stable-dep-of: a9f2b249adee ("pinctrl: ti: ti-iodelay: Fix some error handling paths") Signed-off-by: Sasha Levin --- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 43 +++++++++---------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index ef9758638501..f5e5a23d2226 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -822,53 +822,48 @@ MODULE_DEVICE_TABLE(of, ti_iodelay_of_match); static int ti_iodelay_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = of_node_get(dev->of_node); + struct device_node *np __free(device_node) = of_node_get(dev->of_node); struct resource *res; struct ti_iodelay_device *iod; - int ret = 0; + int ret; if (!np) { - ret = -EINVAL; dev_err(dev, "No OF node\n"); - goto exit_out; + return -EINVAL; } iod = devm_kzalloc(dev, sizeof(*iod), GFP_KERNEL); - if (!iod) { - ret = -ENOMEM; - goto exit_out; - } + if (!iod) + return -ENOMEM; + iod->dev = dev; iod->reg_data = device_get_match_data(dev); if (!iod->reg_data) { - ret = -EINVAL; dev_err(dev, "No DATA match\n"); - goto exit_out; + return -EINVAL; } /* So far We can assume there is only 1 bank of registers */ iod->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(iod->reg_base)) { - ret = PTR_ERR(iod->reg_base); - goto exit_out; - } + if (IS_ERR(iod->reg_base)) + return PTR_ERR(iod->reg_base); + iod->phys_base = res->start; iod->regmap = devm_regmap_init_mmio(dev, iod->reg_base, iod->reg_data->regmap_config); if (IS_ERR(iod->regmap)) { dev_err(dev, "Regmap MMIO init failed.\n"); - ret = PTR_ERR(iod->regmap); - goto exit_out; + return PTR_ERR(iod->regmap); } ret = ti_iodelay_pinconf_init_dev(iod); if (ret) - goto exit_out; + return ret; ret = ti_iodelay_alloc_pins(dev, iod, res->start); if (ret) - goto exit_out; + return ret; iod->desc.pctlops = &ti_iodelay_pinctrl_ops; /* no pinmux ops - we are pinconf */ @@ -879,20 +874,12 @@ static int ti_iodelay_probe(struct platform_device *pdev) ret = devm_pinctrl_register_and_init(dev, &iod->desc, iod, &iod->pctl); if (ret) { dev_err(dev, "Failed to register pinctrl\n"); - goto exit_out; + return ret; } platform_set_drvdata(pdev, iod); - ret = pinctrl_enable(iod->pctl); - if (ret) - goto exit_out; - - return 0; - -exit_out: - of_node_put(np); - return ret; + return pinctrl_enable(iod->pctl); } /** From a3552e2f7d30d27ec95b6a009526934f5a37e0b9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 9 Jul 2024 22:37:43 +0200 Subject: [PATCH 272/534] pinctrl: ti: ti-iodelay: Fix some error handling paths [ Upstream commit a9f2b249adeef2b9744a884355fa8f5e581d507f ] In the probe, if an error occurs after the ti_iodelay_pinconf_init_dev() call, it is likely that ti_iodelay_pinconf_deinit_dev() should be called, as already done in the remove function. Also in ti_iodelay_pinconf_init_dev(), if an error occurs after the first regmap_update_bits() call, it is also likely that the deinit() function should be called. The easier way to fix it is to add a devm_add_action_or_reset() at the rigtht place to have ti_iodelay_pinconf_deinit_dev() called when needed. Doing so, the .remove() function can be removed, and the associated platform_set_drvdata() call in the probe as well. Fixes: 003910ebc83b ("pinctrl: Introduce TI IOdelay configuration driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/0220fa5b925bd08e361be8206a5438f6229deaac.1720556038.git.christophe.jaillet@wanadoo.fr Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 52 ++++++++++--------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index f5e5a23d2226..451801acdc40 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -273,6 +273,22 @@ static int ti_iodelay_pinconf_set(struct ti_iodelay_device *iod, return r; } +/** + * ti_iodelay_pinconf_deinit_dev() - deinit the iodelay device + * @data: IODelay device + * + * Deinitialize the IODelay device (basically just lock the region back up. + */ +static void ti_iodelay_pinconf_deinit_dev(void *data) +{ + struct ti_iodelay_device *iod = data; + const struct ti_iodelay_reg_data *reg = iod->reg_data; + + /* lock the iodelay region back again */ + regmap_update_bits(iod->regmap, reg->reg_global_lock_offset, + reg->global_lock_mask, reg->global_lock_val); +} + /** * ti_iodelay_pinconf_init_dev() - Initialize IODelay device * @iod: iodelay device @@ -295,6 +311,11 @@ static int ti_iodelay_pinconf_init_dev(struct ti_iodelay_device *iod) if (r) return r; + r = devm_add_action_or_reset(iod->dev, ti_iodelay_pinconf_deinit_dev, + iod); + if (r) + return r; + /* Read up Recalibration sequence done by bootloader */ r = regmap_read(iod->regmap, reg->reg_refclk_offset, &val); if (r) @@ -353,21 +374,6 @@ static int ti_iodelay_pinconf_init_dev(struct ti_iodelay_device *iod) return 0; } -/** - * ti_iodelay_pinconf_deinit_dev() - deinit the iodelay device - * @iod: IODelay device - * - * Deinitialize the IODelay device (basically just lock the region back up. - */ -static void ti_iodelay_pinconf_deinit_dev(struct ti_iodelay_device *iod) -{ - const struct ti_iodelay_reg_data *reg = iod->reg_data; - - /* lock the iodelay region back again */ - regmap_update_bits(iod->regmap, reg->reg_global_lock_offset, - reg->global_lock_mask, reg->global_lock_val); -} - /** * ti_iodelay_get_pingroup() - Find the group mapped by a group selector * @iod: iodelay device @@ -877,27 +883,11 @@ static int ti_iodelay_probe(struct platform_device *pdev) return ret; } - platform_set_drvdata(pdev, iod); - return pinctrl_enable(iod->pctl); } -/** - * ti_iodelay_remove() - standard remove - * @pdev: platform device - */ -static void ti_iodelay_remove(struct platform_device *pdev) -{ - struct ti_iodelay_device *iod = platform_get_drvdata(pdev); - - ti_iodelay_pinconf_deinit_dev(iod); - - /* Expect other allocations to be freed by devm */ -} - static struct platform_driver ti_iodelay_driver = { .probe = ti_iodelay_probe, - .remove_new = ti_iodelay_remove, .driver = { .name = DRIVER_NAME, .of_match_table = ti_iodelay_of_match, From 831886bf1a5abc8b435772301cba74d0496d6211 Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Mon, 5 Aug 2024 10:55:10 +0200 Subject: [PATCH 273/534] Input: ilitek_ts_i2c - avoid wrong input subsystem sync [ Upstream commit 7d0b18cd5dc7429917812963611d961fd93cb44d ] For different reasons i2c transaction may fail or report id in the message may be wrong. Avoid closing the frame in this case as it will result in all contacts being dropped, indicating that nothing is touching the screen anymore, while usually it is not the case. Fixes: 42370681bd46 ("Input: Add support for ILITEK Lego Series") Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240805085511.43955-2-francesco@dolcini.it Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/ilitek_ts_i2c.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c index 2f872e95fbba..f70af5a173b3 100644 --- a/drivers/input/touchscreen/ilitek_ts_i2c.c +++ b/drivers/input/touchscreen/ilitek_ts_i2c.c @@ -160,15 +160,14 @@ static int ilitek_process_and_report_v6(struct ilitek_ts_data *ts) error = ilitek_i2c_write_and_read(ts, NULL, 0, 0, buf, 64); if (error) { dev_err(dev, "get touch info failed, err:%d\n", error); - goto err_sync_frame; + return error; } report_max_point = buf[REPORT_COUNT_ADDRESS]; if (report_max_point > ts->max_tp) { dev_err(dev, "FW report max point:%d > panel info. max:%d\n", report_max_point, ts->max_tp); - error = -EINVAL; - goto err_sync_frame; + return -EINVAL; } count = DIV_ROUND_UP(report_max_point, packet_max_point); @@ -178,7 +177,7 @@ static int ilitek_process_and_report_v6(struct ilitek_ts_data *ts) if (error) { dev_err(dev, "get touch info. failed, cnt:%d, err:%d\n", count, error); - goto err_sync_frame; + return error; } } @@ -203,10 +202,10 @@ static int ilitek_process_and_report_v6(struct ilitek_ts_data *ts) ilitek_touch_down(ts, id, x, y); } -err_sync_frame: input_mt_sync_frame(input); input_sync(input); - return error; + + return 0; } /* APIs of cmds for ILITEK Touch IC */ From 075a0ce1fa28482bba6b327781771e97d4c336e2 Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Mon, 5 Aug 2024 10:55:11 +0200 Subject: [PATCH 274/534] Input: ilitek_ts_i2c - add report id message validation [ Upstream commit 208989744a6f01bed86968473312d4e650e600b3 ] Ensure that the touchscreen response has correct "report id" byte before processing the touch data and discard other messages. Fixes: 42370681bd46 ("Input: Add support for ILITEK Lego Series") Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240805085511.43955-3-francesco@dolcini.it Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/ilitek_ts_i2c.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c index f70af5a173b3..e719f5da68bf 100644 --- a/drivers/input/touchscreen/ilitek_ts_i2c.c +++ b/drivers/input/touchscreen/ilitek_ts_i2c.c @@ -37,6 +37,8 @@ #define ILITEK_TP_CMD_GET_MCU_VER 0x61 #define ILITEK_TP_CMD_GET_IC_MODE 0xC0 +#define ILITEK_TP_I2C_REPORT_ID 0x48 + #define REPORT_COUNT_ADDRESS 61 #define ILITEK_SUPPORT_MAX_POINT 40 @@ -163,6 +165,11 @@ static int ilitek_process_and_report_v6(struct ilitek_ts_data *ts) return error; } + if (buf[0] != ILITEK_TP_I2C_REPORT_ID) { + dev_err(dev, "get touch info failed. Wrong id: 0x%02X\n", buf[0]); + return -EINVAL; + } + report_max_point = buf[REPORT_COUNT_ADDRESS]; if (report_max_point > ts->max_tp) { dev_err(dev, "FW report max point:%d > panel info. max:%d\n", From 527ab3eb3b0b4a6ee00e183c1de6a730239e2835 Mon Sep 17 00:00:00 2001 From: Junlin Li Date: Tue, 2 Jul 2024 21:24:13 +0800 Subject: [PATCH 275/534] drivers: media: dvb-frontends/rtl2832: fix an out-of-bounds write error [ Upstream commit 8ae06f360cfaca2b88b98ca89144548b3186aab1 ] Ensure index in rtl2832_pid_filter does not exceed 31 to prevent out-of-bounds access. dev->filters is a 32-bit value, so set_bit and clear_bit functions should only operate on indices from 0 to 31. If index is 32, it will attempt to access a non-existent 33rd bit, leading to out-of-bounds access. Change the boundary check from index > 32 to index >= 32 to resolve this issue. Signed-off-by: Junlin Li Signed-off-by: Hans Verkuil Fixes: 4b01e01a81b6 ("[media] rtl2832: implement PID filter") [hverkuil: added fixes tag, rtl2830_pid_filter -> rtl2832_pid_filter in logmsg] Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/rtl2832.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c index 601cf45c3935..e6a7877a9854 100644 --- a/drivers/media/dvb-frontends/rtl2832.c +++ b/drivers/media/dvb-frontends/rtl2832.c @@ -983,7 +983,7 @@ static int rtl2832_pid_filter(struct dvb_frontend *fe, u8 index, u16 pid, index, pid, onoff, dev->slave_ts); /* skip invalid PIDs (0x2000) */ - if (pid > 0x1fff || index > 32) + if (pid > 0x1fff || index >= 32) return 0; if (onoff) From 58f31be7dfbc0c84a6497ad51924949cf64b86a2 Mon Sep 17 00:00:00 2001 From: Junlin Li Date: Wed, 3 Jul 2024 01:50:23 +0800 Subject: [PATCH 276/534] drivers: media: dvb-frontends/rtl2830: fix an out-of-bounds write error [ Upstream commit 46d7ebfe6a75a454a5fa28604f0ef1491f9d8d14 ] Ensure index in rtl2830_pid_filter does not exceed 31 to prevent out-of-bounds access. dev->filters is a 32-bit value, so set_bit and clear_bit functions should only operate on indices from 0 to 31. If index is 32, it will attempt to access a non-existent 33rd bit, leading to out-of-bounds access. Change the boundary check from index > 32 to index >= 32 to resolve this issue. Fixes: df70ddad81b4 ("[media] rtl2830: implement PID filter") Signed-off-by: Junlin Li Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/rtl2830.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c index 35c969fd2cb5..4e59734ec53e 100644 --- a/drivers/media/dvb-frontends/rtl2830.c +++ b/drivers/media/dvb-frontends/rtl2830.c @@ -609,7 +609,7 @@ static int rtl2830_pid_filter(struct dvb_frontend *fe, u8 index, u16 pid, int on index, pid, onoff); /* skip invalid PIDs (0x2000) */ - if (pid > 0x1fff || index > 32) + if (pid > 0x1fff || index >= 32) return 0; if (onoff) From 390de4d01bc14ade9354b53a5720d78aef93d3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 8 Aug 2024 15:17:07 +0300 Subject: [PATCH 277/534] PCI: Wait for Link before restoring Downstream Buses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e40aa29d47e231a54640addf6a09c1f64c5b63f ] __pci_reset_bus() calls pci_bridge_secondary_bus_reset() to perform the reset and also waits for the Secondary Bus to become again accessible. __pci_reset_bus() then calls pci_bus_restore_locked() that restores the PCI devices connected to the bus, and if necessary, recursively restores also the subordinate buses and their devices. The logic in pci_bus_restore_locked() does not take into account that after restoring a device on one level, there might be another Link Downstream that can only start to come up after restore has been performed for its Downstream Port device. That is, the Link may require additional wait until it becomes accessible. Similarly, pci_slot_restore_locked() lacks wait. Amend pci_bus_restore_locked() and pci_slot_restore_locked() to wait for the Secondary Bus before recursively performing the restore of that bus. Fixes: 090a3c5322e9 ("PCI: Add pci_reset_slot() and pci_reset_bus()") Link: https://lore.kernel.org/r/20240808121708.2523-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 53e9e9788bd5..da52f98d8f7f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5875,8 +5875,10 @@ static void pci_bus_restore_locked(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { pci_dev_restore(dev); - if (dev->subordinate) + if (dev->subordinate) { + pci_bridge_wait_for_secondary_bus(dev, "bus reset"); pci_bus_restore_locked(dev->subordinate); + } } } @@ -5910,8 +5912,10 @@ static void pci_slot_restore_locked(struct pci_slot *slot) if (!dev->slot || dev->slot != slot) continue; pci_dev_restore(dev); - if (dev->subordinate) + if (dev->subordinate) { + pci_bridge_wait_for_secondary_bus(dev, "slot reset"); pci_bus_restore_locked(dev->subordinate); + } } } From 8e152448d0a099b71d64d3e52fcfc8175fbd123b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 10 Aug 2024 16:04:03 +0900 Subject: [PATCH 278/534] firewire: core: correct range of block for case of switch statement [ Upstream commit ebb9d3ca8f7efc1b6a2f1750d1058eda444883d0 ] A commit d8527cab6c31 ("firewire: cdev: implement new event to notify response subaction with time stamp") adds an additional case, FW_CDEV_EVENT_RESPONSE2, into switch statement in complete_transaction(). However, the range of block is beyond to the case label and reaches neibour default label. This commit corrects the range of block. Fortunately, it has few impacts in practice since the local variable in the scope under the label is not used in codes under default label. Fixes: d8527cab6c31 ("firewire: cdev: implement new event to notify response subaction with time stamp") Link: https://lore.kernel.org/r/20240810070403.36801-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 6274b86eb943..73cc2f2dcbf9 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -598,11 +598,11 @@ static void complete_transaction(struct fw_card *card, int rcode, u32 request_ts queue_event(client, &e->event, rsp, sizeof(*rsp) + rsp->length, NULL, 0); break; + } default: WARN_ON(1); break; } - } /* Drop the idr's reference */ client_put(client); From e85ab507882db165c10a858d7f685a0a38f0312e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Jul 2024 18:53:26 -0500 Subject: [PATCH 279/534] PCI: keystone: Fix if-statement expression in ks_pcie_quirk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6188a1c762eb9bbd444f47696eda77a5eae6207a ] This code accidentally uses && where || was intended. It potentially results in a NULL dereference. Thus, fix the if-statement expression to use the correct condition. Fixes: 86f271f22bbb ("PCI: keystone: Add workaround for Errata #i2037 (AM65x SR 1.0)") Link: https://lore.kernel.org/linux-pci/1b762a93-e1b2-4af3-8c04-c8843905c279@stanley.mountain Signed-off-by: Dan Carpenter [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Reviewed-by: Siddharth Vadapalli Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pci-keystone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index c1dedc83759c..c5475830c835 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -579,7 +579,7 @@ static void ks_pcie_quirk(struct pci_dev *dev) */ if (pci_match_id(am6_pci_devids, bridge)) { bridge_dev = pci_get_host_bridge_device(dev); - if (!bridge_dev && !bridge_dev->parent) + if (!bridge_dev || !bridge_dev->parent) return; ks_pcie = dev_get_drvdata(bridge_dev->parent); From 4ddb580089e3b067c21212ea8153d6b7a0d5853e Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 30 Jul 2024 11:18:15 +0530 Subject: [PATCH 280/534] clk: qcom: ipq5332: Register gcc_qdss_tsctr_clk_src [ Upstream commit 0e1ac23dfa3f635e486fdeb08206b981cb0a2a6b ] gcc_qdss_tsctr_clk_src (enabled in the boot loaders and dependent on gpll4_main) was not registered as one of the ipq5332 clocks. Hence clk_disable_unused() disabled 'gpll4_main' assuming there were no consumers for 'gpll4_main' resulting in system freeze or reboots. Reviewed-by: Dmitry Baryshkov Fixes: 3d89d52970fd ("clk: qcom: add Global Clock controller (GCC) driver for IPQ5332 SoC") Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240730054817.1915652-4-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq5332.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-ipq5332.c b/drivers/clk/qcom/gcc-ipq5332.c index f98591148a97..6a4877d88829 100644 --- a/drivers/clk/qcom/gcc-ipq5332.c +++ b/drivers/clk/qcom/gcc-ipq5332.c @@ -3388,6 +3388,7 @@ static struct clk_regmap *gcc_ipq5332_clocks[] = { [GCC_QDSS_DAP_DIV_CLK_SRC] = &gcc_qdss_dap_div_clk_src.clkr, [GCC_QDSS_ETR_USB_CLK] = &gcc_qdss_etr_usb_clk.clkr, [GCC_QDSS_EUD_AT_CLK] = &gcc_qdss_eud_at_clk.clkr, + [GCC_QDSS_TSCTR_CLK_SRC] = &gcc_qdss_tsctr_clk_src.clkr, [GCC_QPIC_AHB_CLK] = &gcc_qpic_ahb_clk.clkr, [GCC_QPIC_CLK] = &gcc_qpic_clk.clkr, [GCC_QPIC_IO_MACRO_CLK] = &gcc_qpic_io_macro_clk.clkr, From 583314ebaae7b63af5cc08d15b7ffe01bf3f6c44 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 4 Aug 2024 08:40:06 +0300 Subject: [PATCH 281/534] clk: qcom: dispcc-sm8250: use special function for Lucid 5LPE PLL [ Upstream commit 362be5cbaec2a663eb86b7105313368b4a71fc1e ] According to msm-5.10 the lucid 5lpe PLLs have require slightly different configuration that trion / lucid PLLs, it doesn't set PLL_UPDATE_BYPASS bit. Add corresponding function and use it for the display clock controller on Qualcomm SM8350 platform. Fixes: 205737fe3345 ("clk: qcom: add support for SM8350 DISPCC") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240804-sm8350-fixes-v1-2-1149dd8399fe@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/clk-alpha-pll.c | 52 ++++++++++++++++++++++++++++++++ drivers/clk/qcom/clk-alpha-pll.h | 2 ++ drivers/clk/qcom/dispcc-sm8250.c | 9 ++++-- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 1701cce74df7..7464cf64803f 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -1757,6 +1757,58 @@ const struct clk_ops clk_alpha_pll_agera_ops = { }; EXPORT_SYMBOL_GPL(clk_alpha_pll_agera_ops); +/** + * clk_lucid_5lpe_pll_configure - configure the lucid 5lpe pll + * + * @pll: clk alpha pll + * @regmap: register map + * @config: configuration to apply for pll + */ +void clk_lucid_5lpe_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, + const struct alpha_pll_config *config) +{ + /* + * If the bootloader left the PLL enabled it's likely that there are + * RCGs that will lock up if we disable the PLL below. + */ + if (trion_pll_is_enabled(pll, regmap)) { + pr_debug("Lucid 5LPE PLL is already enabled, skipping configuration\n"); + return; + } + + clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l); + regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); + clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll), + config->config_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll), + config->config_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U1(pll), + config->config_ctl_hi1_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll), + config->user_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U(pll), + config->user_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U1(pll), + config->user_ctl_hi1_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll), + config->test_ctl_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll), + config->test_ctl_hi_val); + clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U1(pll), + config->test_ctl_hi1_val); + + /* Disable PLL output */ + regmap_update_bits(regmap, PLL_MODE(pll), PLL_OUTCTRL, 0); + + /* Set operation mode to OFF */ + regmap_write(regmap, PLL_OPMODE(pll), PLL_STANDBY); + + /* Place the PLL in STANDBY mode */ + regmap_update_bits(regmap, PLL_MODE(pll), PLL_RESET_N, PLL_RESET_N); +} +EXPORT_SYMBOL_GPL(clk_lucid_5lpe_pll_configure); + static int alpha_pll_lucid_5lpe_enable(struct clk_hw *hw) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index 903fbab9b58e..3fd0ef41c72c 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -198,6 +198,8 @@ void clk_agera_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, void clk_zonda_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); +void clk_lucid_5lpe_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, + const struct alpha_pll_config *config); void clk_lucid_evo_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); void clk_rivian_evo_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c index e17bb8b543b5..9a9e0852c91f 100644 --- a/drivers/clk/qcom/dispcc-sm8250.c +++ b/drivers/clk/qcom/dispcc-sm8250.c @@ -1359,8 +1359,13 @@ static int disp_cc_sm8250_probe(struct platform_device *pdev) disp_cc_sm8250_clocks[DISP_CC_MDSS_EDP_GTC_CLK_SRC] = NULL; } - clk_lucid_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config); - clk_lucid_pll_configure(&disp_cc_pll1, regmap, &disp_cc_pll1_config); + if (of_device_is_compatible(pdev->dev.of_node, "qcom,sm8350-dispcc")) { + clk_lucid_5lpe_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config); + clk_lucid_5lpe_pll_configure(&disp_cc_pll1, regmap, &disp_cc_pll1_config); + } else { + clk_lucid_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config); + clk_lucid_pll_configure(&disp_cc_pll1, regmap, &disp_cc_pll1_config); + } /* Enable clock gating for MDP clocks */ regmap_update_bits(regmap, 0x8000, 0x10, 0x10); From dbba3fce3e2f63f63750d28218dffbcd1dd73c61 Mon Sep 17 00:00:00 2001 From: Pieterjan Camerlynck Date: Thu, 11 Jul 2024 14:49:51 +0200 Subject: [PATCH 282/534] leds: leds-pca995x: Add support for NXP PCA9956B [ Upstream commit 68d6520d2e76998cdea58f6dd8782de5ab5b28af ] Add support for PCA9956B chip, which belongs to the same family. This chip features 24 instead of 16 outputs, so add a chipdef struct to deal with the different register layouts. Reviewed-by: Marek Vasut Signed-off-by: Pieterjan Camerlynck Link: https://lore.kernel.org/r/20240711-pca995x-v4-2-702a67148065@gmail.com Signed-off-by: Lee Jones Stable-dep-of: 82c5ada1f9d0 ("leds: pca995x: Fix device child node usage in pca995x_probe()") Signed-off-by: Sasha Levin --- drivers/leds/leds-pca995x.c | 59 ++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/drivers/leds/leds-pca995x.c b/drivers/leds/leds-pca995x.c index 78215dff1499..686b77772cce 100644 --- a/drivers/leds/leds-pca995x.c +++ b/drivers/leds/leds-pca995x.c @@ -19,10 +19,6 @@ #define PCA995X_MODE1 0x00 #define PCA995X_MODE2 0x01 #define PCA995X_LEDOUT0 0x02 -#define PCA9955B_PWM0 0x08 -#define PCA9952_PWM0 0x0A -#define PCA9952_IREFALL 0x43 -#define PCA9955B_IREFALL 0x45 /* Auto-increment disabled. Normal mode */ #define PCA995X_MODE1_CFG 0x00 @@ -34,17 +30,38 @@ #define PCA995X_LDRX_MASK 0x3 #define PCA995X_LDRX_BITS 2 -#define PCA995X_MAX_OUTPUTS 16 +#define PCA995X_MAX_OUTPUTS 24 #define PCA995X_OUTPUTS_PER_REG 4 #define PCA995X_IREFALL_FULL_CFG 0xFF #define PCA995X_IREFALL_HALF_CFG (PCA995X_IREFALL_FULL_CFG / 2) -#define PCA995X_TYPE_NON_B 0 -#define PCA995X_TYPE_B 1 - #define ldev_to_led(c) container_of(c, struct pca995x_led, ldev) +struct pca995x_chipdef { + unsigned int num_leds; + u8 pwm_base; + u8 irefall; +}; + +static const struct pca995x_chipdef pca9952_chipdef = { + .num_leds = 16, + .pwm_base = 0x0a, + .irefall = 0x43, +}; + +static const struct pca995x_chipdef pca9955b_chipdef = { + .num_leds = 16, + .pwm_base = 0x08, + .irefall = 0x45, +}; + +static const struct pca995x_chipdef pca9956b_chipdef = { + .num_leds = 24, + .pwm_base = 0x0a, + .irefall = 0x40, +}; + struct pca995x_led { unsigned int led_no; struct led_classdev ldev; @@ -54,7 +71,7 @@ struct pca995x_led { struct pca995x_chip { struct regmap *regmap; struct pca995x_led leds[PCA995X_MAX_OUTPUTS]; - int btype; + const struct pca995x_chipdef *chipdef; }; static int pca995x_brightness_set(struct led_classdev *led_cdev, @@ -62,10 +79,11 @@ static int pca995x_brightness_set(struct led_classdev *led_cdev, { struct pca995x_led *led = ldev_to_led(led_cdev); struct pca995x_chip *chip = led->chip; + const struct pca995x_chipdef *chipdef = chip->chipdef; u8 ledout_addr, pwmout_addr; int shift, ret; - pwmout_addr = (chip->btype ? PCA9955B_PWM0 : PCA9952_PWM0) + led->led_no; + pwmout_addr = chipdef->pwm_base + led->led_no; ledout_addr = PCA995X_LEDOUT0 + (led->led_no / PCA995X_OUTPUTS_PER_REG); shift = PCA995X_LDRX_BITS * (led->led_no % PCA995X_OUTPUTS_PER_REG); @@ -104,11 +122,12 @@ static int pca995x_probe(struct i2c_client *client) struct fwnode_handle *led_fwnodes[PCA995X_MAX_OUTPUTS] = { 0 }; struct fwnode_handle *np, *child; struct device *dev = &client->dev; + const struct pca995x_chipdef *chipdef; struct pca995x_chip *chip; struct pca995x_led *led; - int i, btype, reg, ret; + int i, reg, ret; - btype = (unsigned long)device_get_match_data(&client->dev); + chipdef = device_get_match_data(&client->dev); np = dev_fwnode(dev); if (!np) @@ -118,7 +137,7 @@ static int pca995x_probe(struct i2c_client *client) if (!chip) return -ENOMEM; - chip->btype = btype; + chip->chipdef = chipdef; chip->regmap = devm_regmap_init_i2c(client, &pca995x_regmap); if (IS_ERR(chip->regmap)) return PTR_ERR(chip->regmap); @@ -170,21 +189,21 @@ static int pca995x_probe(struct i2c_client *client) return ret; /* IREF Output current value for all LEDn outputs */ - return regmap_write(chip->regmap, - btype ? PCA9955B_IREFALL : PCA9952_IREFALL, - PCA995X_IREFALL_HALF_CFG); + return regmap_write(chip->regmap, chipdef->irefall, PCA995X_IREFALL_HALF_CFG); } static const struct i2c_device_id pca995x_id[] = { - { "pca9952", .driver_data = (kernel_ulong_t)PCA995X_TYPE_NON_B }, - { "pca9955b", .driver_data = (kernel_ulong_t)PCA995X_TYPE_B }, + { "pca9952", .driver_data = (kernel_ulong_t)&pca9952_chipdef }, + { "pca9955b", .driver_data = (kernel_ulong_t)&pca9955b_chipdef }, + { "pca9956b", .driver_data = (kernel_ulong_t)&pca9956b_chipdef }, {} }; MODULE_DEVICE_TABLE(i2c, pca995x_id); static const struct of_device_id pca995x_of_match[] = { - { .compatible = "nxp,pca9952", .data = (void *)PCA995X_TYPE_NON_B }, - { .compatible = "nxp,pca9955b", .data = (void *)PCA995X_TYPE_B }, + { .compatible = "nxp,pca9952", .data = &pca9952_chipdef }, + { .compatible = "nxp,pca9955b", . data = &pca9955b_chipdef }, + { .compatible = "nxp,pca9956b", .data = &pca9956b_chipdef }, {}, }; MODULE_DEVICE_TABLE(of, pca995x_of_match); From d14451d91a112eb2f2742208d755e806f9ffa821 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 5 Aug 2024 16:49:45 +0200 Subject: [PATCH 283/534] leds: pca995x: Use device_for_each_child_node() to access device child nodes [ Upstream commit 6eefd65ba6ae29ab801f6461e59c10f93dd496f8 ] The iterated nodes are direct children of the device node, and the `device_for_each_child_node()` macro accounts for child node availability. `fwnode_for_each_available_child_node()` is meant to access the child nodes of an fwnode, and therefore not direct child nodes of the device node. Use `device_for_each_child_node()` to indicate device's direct child nodes. Signed-off-by: Javier Carrasco Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240805-device_for_each_child_node-available-v3-2-48243a4aa5c0@gmail.com Signed-off-by: Lee Jones Stable-dep-of: 82c5ada1f9d0 ("leds: pca995x: Fix device child node usage in pca995x_probe()") Signed-off-by: Sasha Levin --- drivers/leds/leds-pca995x.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/leds/leds-pca995x.c b/drivers/leds/leds-pca995x.c index 686b77772cce..83bc9669544c 100644 --- a/drivers/leds/leds-pca995x.c +++ b/drivers/leds/leds-pca995x.c @@ -120,7 +120,7 @@ static const struct regmap_config pca995x_regmap = { static int pca995x_probe(struct i2c_client *client) { struct fwnode_handle *led_fwnodes[PCA995X_MAX_OUTPUTS] = { 0 }; - struct fwnode_handle *np, *child; + struct fwnode_handle *child; struct device *dev = &client->dev; const struct pca995x_chipdef *chipdef; struct pca995x_chip *chip; @@ -129,8 +129,7 @@ static int pca995x_probe(struct i2c_client *client) chipdef = device_get_match_data(&client->dev); - np = dev_fwnode(dev); - if (!np) + if (!dev_fwnode(dev)) return -ENODEV; chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); @@ -144,17 +143,13 @@ static int pca995x_probe(struct i2c_client *client) i2c_set_clientdata(client, chip); - fwnode_for_each_available_child_node(np, child) { + device_for_each_child_node(dev, child) { ret = fwnode_property_read_u32(child, "reg", ®); - if (ret) { - fwnode_handle_put(child); + if (ret) return ret; - } - if (reg < 0 || reg >= PCA995X_MAX_OUTPUTS || led_fwnodes[reg]) { - fwnode_handle_put(child); + if (reg < 0 || reg >= PCA995X_MAX_OUTPUTS || led_fwnodes[reg]) return -EINVAL; - } led = &chip->leds[reg]; led_fwnodes[reg] = child; From 597c72f4d1624848fdc8b58c1ef6cc467ab652cb Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 7 Aug 2024 15:37:03 +0200 Subject: [PATCH 284/534] leds: pca995x: Fix device child node usage in pca995x_probe() [ Upstream commit 82c5ada1f9d05902a4ccb926c7ce34e2fe699283 ] The current implementation accesses the `child` fwnode handle outside of device_for_each_child_node() without incrementing its refcount. Add the missing call to `fwnode_handle_get(child)`. The cleanup process where `child` is accessed is not right either because a single call to `fwnode_handle_put()` is carried out in case of an error, ignoring unasigned nodes at the point when the error happens. Keep `child` inside of the first loop, and use the helper pointer that receives references via `fwnode_handle_get()` to handle the child nodes within the second loop. Keeping `child` inside the first node has also the advantage that the scoped version of the loop can be used. Fixes: ee4e80b2962e ("leds: pca995x: Add support for PCA995X chips") Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240807-leds-pca995x-fix-fwnode-usage-v1-1-8057c84dc583@gmail.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/leds-pca995x.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/leds/leds-pca995x.c b/drivers/leds/leds-pca995x.c index 83bc9669544c..11c7bb69573e 100644 --- a/drivers/leds/leds-pca995x.c +++ b/drivers/leds/leds-pca995x.c @@ -120,12 +120,11 @@ static const struct regmap_config pca995x_regmap = { static int pca995x_probe(struct i2c_client *client) { struct fwnode_handle *led_fwnodes[PCA995X_MAX_OUTPUTS] = { 0 }; - struct fwnode_handle *child; struct device *dev = &client->dev; const struct pca995x_chipdef *chipdef; struct pca995x_chip *chip; struct pca995x_led *led; - int i, reg, ret; + int i, j, reg, ret; chipdef = device_get_match_data(&client->dev); @@ -143,7 +142,7 @@ static int pca995x_probe(struct i2c_client *client) i2c_set_clientdata(client, chip); - device_for_each_child_node(dev, child) { + device_for_each_child_node_scoped(dev, child) { ret = fwnode_property_read_u32(child, "reg", ®); if (ret) return ret; @@ -152,7 +151,7 @@ static int pca995x_probe(struct i2c_client *client) return -EINVAL; led = &chip->leds[reg]; - led_fwnodes[reg] = child; + led_fwnodes[reg] = fwnode_handle_get(child); led->chip = chip; led->led_no = reg; led->ldev.brightness_set_blocking = pca995x_brightness_set; @@ -171,7 +170,8 @@ static int pca995x_probe(struct i2c_client *client) &chip->leds[i].ldev, &init_data); if (ret < 0) { - fwnode_handle_put(child); + for (j = i; j < PCA995X_MAX_OUTPUTS; j++) + fwnode_handle_put(led_fwnodes[j]); return dev_err_probe(dev, ret, "Could not register LED %s\n", chip->leds[i].ldev.name); From e39cc0c37d7cb9d6d0aa28d9b7c8f5506a92982a Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Mon, 12 Aug 2024 13:26:59 -0700 Subject: [PATCH 285/534] x86/PCI: Check pcie_find_root_port() return for NULL [ Upstream commit dbc3171194403d0d40e4bdeae666f6e76e428b53 ] If pcie_find_root_port() is unable to locate a Root Port, it will return NULL. Check the pointer for NULL before dereferencing it. This particular case is in a quirk for devices that are always below a Root Port, so this won't avoid a problem and doesn't need to be backported, but check as a matter of style and to prevent copy/paste mistakes. Link: https://lore.kernel.org/r/20240812202659.1649121-1-samasth.norway.ananda@oracle.com Signed-off-by: Samasth Norway Ananda [bhelgaas: drop Fixes: and explain why there's no problem in this case] Signed-off-by: Bjorn Helgaas Reviewed-by: Mario Limonciello Signed-off-by: Sasha Levin --- arch/x86/pci/fixup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b33afb240601..98a9bb92d75c 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -980,7 +980,7 @@ static void amd_rp_pme_suspend(struct pci_dev *dev) return; rp = pcie_find_root_port(dev); - if (!rp->pm_cap) + if (!rp || !rp->pm_cap) return; rp->pme_support &= ~((PCI_PM_CAP_PME_D3hot|PCI_PM_CAP_PME_D3cold) >> @@ -994,7 +994,7 @@ static void amd_rp_pme_resume(struct pci_dev *dev) u16 pmc; rp = pcie_find_root_port(dev); - if (!rp->pm_cap) + if (!rp || !rp->pm_cap) return; pci_read_config_word(rp, rp->pm_cap + PCI_PM_PMC, &pmc); From 18a672c62d735744c6340eb3f5e58934a5d34cf2 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Mon, 19 Aug 2024 14:20:44 +0800 Subject: [PATCH 286/534] nvdimm: Fix devs leaks in scan_labels() [ Upstream commit 62c2aa6b1f565d2fc1ec11a6e9e8336ce37a6426 ] scan_labels() leaks memory when label scanning fails and it falls back to just creating a default "seed" namespace for userspace to configure. Root can force the kernel to leak memory. Allocate the minimum resources unconditionally and release them when unneeded to avoid the memory leak. A kmemleak reports: unreferenced object 0xffff88800dda1980 (size 16): comm "kworker/u10:5", pid 69, jiffies 4294671781 hex dump (first 16 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [<00000000c5dea560>] __kmalloc+0x32c/0x470 [<000000009ed43c83>] nd_region_register_namespaces+0x6fb/0x1120 [libnvdimm] [<000000000e07a65c>] nd_region_probe+0xfe/0x210 [libnvdimm] [<000000007b79ce5f>] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] [<00000000a5f3da2e>] really_probe+0xc6/0x390 [<00000000129e2a69>] __driver_probe_device+0x78/0x150 [<000000002dfed28b>] driver_probe_device+0x1e/0x90 [<00000000e7048de2>] __device_attach_driver+0x85/0x110 [<0000000032dca295>] bus_for_each_drv+0x85/0xe0 [<00000000391c5a7d>] __device_attach+0xbe/0x1e0 [<0000000026dabec0>] bus_probe_device+0x94/0xb0 [<00000000c590d936>] device_add+0x656/0x870 [<000000003d69bfaa>] nd_async_device_register+0xe/0x50 [libnvdimm] [<000000003f4c52a4>] async_run_entry_fn+0x2e/0x110 [<00000000e201f4b0>] process_one_work+0x1ee/0x600 [<000000006d90d5a9>] worker_thread+0x183/0x350 Cc: Dave Jiang Cc: Ira Weiny Fixes: 1b40e09a1232 ("libnvdimm: blk labels and namespace instantiation") Suggested-by: Dan Williams Signed-off-by: Li Zhijian Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20240819062045.1481298-1-lizhijian@fujitsu.com Signed-off-by: Ira Weiny Signed-off-by: Sasha Levin --- drivers/nvdimm/namespace_devs.c | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 07177eadc56e..1ea8c27e8874 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1927,12 +1927,16 @@ static int cmp_dpa(const void *a, const void *b) static struct device **scan_labels(struct nd_region *nd_region) { int i, count = 0; - struct device *dev, **devs = NULL; + struct device *dev, **devs; struct nd_label_ent *label_ent, *e; struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1; + devs = kcalloc(2, sizeof(dev), GFP_KERNEL); + if (!devs) + return NULL; + /* "safe" because create_namespace_pmem() might list_move() label_ent */ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) { struct nd_namespace_label *nd_label = label_ent->label; @@ -1951,12 +1955,14 @@ static struct device **scan_labels(struct nd_region *nd_region) goto err; if (i < count) continue; - __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL); - if (!__devs) - goto err; - memcpy(__devs, devs, sizeof(dev) * count); - kfree(devs); - devs = __devs; + if (count) { + __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL); + if (!__devs) + goto err; + memcpy(__devs, devs, sizeof(dev) * count); + kfree(devs); + devs = __devs; + } dev = create_namespace_pmem(nd_region, nd_mapping, nd_label); if (IS_ERR(dev)) { @@ -1983,11 +1989,6 @@ static struct device **scan_labels(struct nd_region *nd_region) /* Publish a zero-sized namespace for userspace to configure. */ nd_mapping_free_labels(nd_mapping); - - devs = kcalloc(2, sizeof(dev), GFP_KERNEL); - if (!devs) - goto err; - nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); if (!nspm) goto err; @@ -2026,11 +2027,10 @@ static struct device **scan_labels(struct nd_region *nd_region) return devs; err: - if (devs) { - for (i = 0; devs[i]; i++) - namespace_pmem_release(devs[i]); - kfree(devs); - } + for (i = 0; devs[i]; i++) + namespace_pmem_release(devs[i]); + kfree(devs); + return NULL; } From f1058b0780b4e2790b0a84f17c8f117288bd3ade Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 31 May 2024 12:13:33 -0400 Subject: [PATCH 287/534] PCI: xilinx-nwl: Fix register misspelling [ Upstream commit a437027ae1730b8dc379c75fa0dd7d3036917400 ] MSIC -> MISC Fixes: c2a7ff18edcd ("PCI: xilinx-nwl: Expand error logging") Link: https://lore.kernel.org/r/20240531161337.864994-4-sean.anderson@linux.dev Signed-off-by: Sean Anderson Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/controller/pcie-xilinx-nwl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 176686bdb15c..7cf05431d54b 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -80,8 +80,8 @@ #define MSGF_MISC_SR_NON_FATAL_DEV BIT(22) #define MSGF_MISC_SR_FATAL_DEV BIT(23) #define MSGF_MISC_SR_LINK_DOWN BIT(24) -#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH BIT(25) -#define MSGF_MSIC_SR_LINK_BWIDTH BIT(26) +#define MSGF_MISC_SR_LINK_AUTO_BWIDTH BIT(25) +#define MSGF_MISC_SR_LINK_BWIDTH BIT(26) #define MSGF_MISC_SR_MASKALL (MSGF_MISC_SR_RXMSG_AVAIL | \ MSGF_MISC_SR_RXMSG_OVER | \ @@ -96,8 +96,8 @@ MSGF_MISC_SR_NON_FATAL_DEV | \ MSGF_MISC_SR_FATAL_DEV | \ MSGF_MISC_SR_LINK_DOWN | \ - MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \ - MSGF_MSIC_SR_LINK_BWIDTH) + MSGF_MISC_SR_LINK_AUTO_BWIDTH | \ + MSGF_MISC_SR_LINK_BWIDTH) /* Legacy interrupt status mask bits */ #define MSGF_LEG_SR_INTA BIT(0) @@ -301,10 +301,10 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data) if (misc_stat & MSGF_MISC_SR_FATAL_DEV) dev_err(dev, "Fatal Error Detected\n"); - if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH) + if (misc_stat & MSGF_MISC_SR_LINK_AUTO_BWIDTH) dev_info(dev, "Link Autonomous Bandwidth Management Status bit set\n"); - if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH) + if (misc_stat & MSGF_MISC_SR_LINK_BWIDTH) dev_info(dev, "Link Bandwidth Management Status bit set\n"); /* Clear misc interrupt status */ From 4f201a94ac52a1ed9cd04dc0c0f73ef3e9ab53a5 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 31 May 2024 12:13:35 -0400 Subject: [PATCH 288/534] PCI: xilinx-nwl: Clean up clock on probe failure/removal [ Upstream commit cfd67903977b13f63340a4eb5a1cc890994f2c62 ] Make sure we turn off the clock on probe failure and device removal. Fixes: de0a01f52966 ("PCI: xilinx-nwl: Enable the clock through CCF") Link: https://lore.kernel.org/r/20240531161337.864994-6-sean.anderson@linux.dev Signed-off-by: Sean Anderson Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/controller/pcie-xilinx-nwl.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 7cf05431d54b..60afbb51511c 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -790,6 +790,7 @@ static int nwl_pcie_probe(struct platform_device *pdev) return -ENODEV; pcie = pci_host_bridge_priv(bridge); + platform_set_drvdata(pdev, pcie); pcie->dev = dev; pcie->ecam_value = NWL_ECAM_VALUE_DEFAULT; @@ -813,13 +814,13 @@ static int nwl_pcie_probe(struct platform_device *pdev) err = nwl_pcie_bridge_init(pcie); if (err) { dev_err(dev, "HW Initialization failed\n"); - return err; + goto err_clk; } err = nwl_pcie_init_irq_domain(pcie); if (err) { dev_err(dev, "Failed creating IRQ Domain\n"); - return err; + goto err_clk; } bridge->sysdata = pcie; @@ -829,11 +830,24 @@ static int nwl_pcie_probe(struct platform_device *pdev) err = nwl_pcie_enable_msi(pcie); if (err < 0) { dev_err(dev, "failed to enable MSI support: %d\n", err); - return err; + goto err_clk; } } - return pci_host_probe(bridge); + err = pci_host_probe(bridge); + if (!err) + return 0; + +err_clk: + clk_disable_unprepare(pcie->clk); + return err; +} + +static void nwl_pcie_remove(struct platform_device *pdev) +{ + struct nwl_pcie *pcie = platform_get_drvdata(pdev); + + clk_disable_unprepare(pcie->clk); } static struct platform_driver nwl_pcie_driver = { @@ -843,5 +857,6 @@ static struct platform_driver nwl_pcie_driver = { .of_match_table = nwl_pcie_of_match, }, .probe = nwl_pcie_probe, + .remove_new = nwl_pcie_remove, }; builtin_platform_driver(nwl_pcie_driver); From 451249bb8d4418377d6be8fa7dfd54d6d88d088c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 31 Jul 2024 17:49:32 +0100 Subject: [PATCH 289/534] media: platform: rzg2l-cru: rzg2l-csi2: Add missing MODULE_DEVICE_TABLE [ Upstream commit 07668fb0f867388bfdac0b60dbf51a4ad789f8e7 ] The rzg2l-csi2 driver can be compiled as a module, but lacks MODULE_DEVICE_TABLE() and will therefore not be loaded automatically. Fix this. Fixes: 51e8415e39a9 ("media: platform: Add Renesas RZ/G2L MIPI CSI-2 receiver driver") Signed-off-by: Biju Das Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20240731164935.308994-1-biju.das.jz@bp.renesas.com Signed-off-by: Laurent Pinchart Signed-off-by: Sasha Levin --- drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c b/drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c index ad2bd71037ab..246eec259c5d 100644 --- a/drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c +++ b/drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c @@ -854,6 +854,7 @@ static const struct of_device_id rzg2l_csi2_of_table[] = { { .compatible = "renesas,rzg2l-csi2", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, rzg2l_csi2_of_table); static struct platform_driver rzg2l_csi2_pdrv = { .remove_new = rzg2l_csi2_remove, From 8b7df76356d098f85f3bd2c7cf6fb43f531893d7 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 20 Aug 2024 13:33:36 +0200 Subject: [PATCH 290/534] RDMA/iwcm: Fix WARNING:at_kernel/workqueue.c:#check_flush_dependency [ Upstream commit 86dfdd8288907f03c18b7fb462e0e232c4f98d89 ] In the commit aee2424246f9 ("RDMA/iwcm: Fix a use-after-free related to destroying CM IDs"), the function flush_workqueue is invoked to flush the work queue iwcm_wq. But at that time, the work queue iwcm_wq was created via the function alloc_ordered_workqueue without the flag WQ_MEM_RECLAIM. Because the current process is trying to flush the whole iwcm_wq, if iwcm_wq doesn't have the flag WQ_MEM_RECLAIM, verify that the current process is not reclaiming memory or running on a workqueue which doesn't have the flag WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to a deadlock. The call trace is as below: [ 125.350876][ T1430] Call Trace: [ 125.356281][ T1430] [ 125.361285][ T1430] ? __warn (kernel/panic.c:693) [ 125.367640][ T1430] ? check_flush_dependency (kernel/workqueue.c:3706 (discriminator 9)) [ 125.375689][ T1430] ? report_bug (lib/bug.c:180 lib/bug.c:219) [ 125.382505][ T1430] ? handle_bug (arch/x86/kernel/traps.c:239) [ 125.388987][ T1430] ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1)) [ 125.395831][ T1430] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621) [ 125.403125][ T1430] ? check_flush_dependency (kernel/workqueue.c:3706 (discriminator 9)) [ 125.410984][ T1430] ? check_flush_dependency (kernel/workqueue.c:3706 (discriminator 9)) [ 125.418764][ T1430] __flush_workqueue (kernel/workqueue.c:3970) [ 125.426021][ T1430] ? __pfx___might_resched (kernel/sched/core.c:10151) [ 125.433431][ T1430] ? destroy_cm_id (drivers/infiniband/core/iwcm.c:375) iw_cm [ 125.441209][ T1430] ? __pfx___flush_workqueue (kernel/workqueue.c:3910) [ 125.473900][ T1430] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:107 include/linux/atomic/atomic-arch-fallback.h:2170 include/linux/atomic/atomic-instrumented.h:1302 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) [ 125.473909][ T1430] ? __pfx__raw_spin_lock_irqsave (kernel/locking/spinlock.c:161) [ 125.482537][ T1430] _destroy_id (drivers/infiniband/core/cma.c:2044) rdma_cm [ 125.495072][ T1430] nvme_rdma_free_queue (drivers/nvme/host/rdma.c:656 drivers/nvme/host/rdma.c:650) nvme_rdma [ 125.505827][ T1430] nvme_rdma_reset_ctrl_work (drivers/nvme/host/rdma.c:2180) nvme_rdma [ 125.505831][ T1430] process_one_work (kernel/workqueue.c:3231) [ 125.515122][ T1430] worker_thread (kernel/workqueue.c:3306 kernel/workqueue.c:3393) [ 125.515127][ T1430] ? __pfx_worker_thread (kernel/workqueue.c:3339) [ 125.531837][ T1430] kthread (kernel/kthread.c:389) [ 125.539864][ T1430] ? __pfx_kthread (kernel/kthread.c:342) [ 125.550628][ T1430] ret_from_fork (arch/x86/kernel/process.c:147) [ 125.558840][ T1430] ? __pfx_kthread (kernel/kthread.c:342) [ 125.558844][ T1430] ret_from_fork_asm (arch/x86/entry/entry_64.S:257) [ 125.566487][ T1430] [ 125.566488][ T1430] ---[ end trace 0000000000000000 ]--- Fixes: aee2424246f9 ("RDMA/iwcm: Fix a use-after-free related to destroying CM IDs") Link: https://patch.msgid.link/r/20240820113336.19860-1-yanjun.zhu@linux.dev Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202408151633.fc01893c-oliver.sang@intel.com Tested-by: kernel test robot Signed-off-by: Zhu Yanjun Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/iwcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 2d09d1be38f1..3e4941754b48 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1191,7 +1191,7 @@ static int __init iw_cm_init(void) if (ret) return ret; - iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) goto err_alloc; From b6edb3fd96bf0579451e8b0bc5d19940bfa81df0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 19 Aug 2024 10:46:25 +0800 Subject: [PATCH 291/534] pinctrl: single: fix missing error code in pcs_probe() [ Upstream commit cacd8cf79d7823b07619865e994a7916fcc8ae91 ] If pinctrl_enable() fails in pcs_probe(), it should return the error code. Fixes: 8f773bfbdd42 ("pinctrl: single: fix possible memory leak when pinctrl_enable() fails") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/20240819024625.154441-1-yangyingliang@huaweicloud.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-single.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index bbe7cc894b1a..6c670203b3ac 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1918,7 +1918,8 @@ static int pcs_probe(struct platform_device *pdev) dev_info(pcs->dev, "%i pins, size %u\n", pcs->desc.npins, pcs->size); - if (pinctrl_enable(pcs->pctl)) + ret = pinctrl_enable(pcs->pctl); + if (ret) goto free; return 0; From 08d13bcb9cea2b9ceb00b956681cac682241db60 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sun, 14 Jul 2024 17:13:15 +0300 Subject: [PATCH 292/534] clk: at91: sama7g5: Allocate only the needed amount of memory for PLLs [ Upstream commit 2d6e9ee7cb3e79b1713783c633b13af9aeffc90c ] The maximum number of PLL components on SAMA7G5 is 3 (one fractional part and 2 dividers). Allocate the needed amount of memory for sama7g5_plls 2d array. Previous code used to allocate 7 array entries for each PLL. While at it, replace 3 with PLL_COMPID_MAX in the loop which parses the sama7g5_plls 2d array. Fixes: cb783bbbcf54 ("clk: at91: sama7g5: add clock support for sama7g5") Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20240714141315.19480-1-claudiu.beznea@tuxon.dev Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- drivers/clk/at91/sama7g5.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 91b5c6f14819..4e9594714b14 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -66,6 +66,7 @@ enum pll_component_id { PLL_COMPID_FRAC, PLL_COMPID_DIV0, PLL_COMPID_DIV1, + PLL_COMPID_MAX, }; /* @@ -165,7 +166,7 @@ static struct sama7g5_pll { u8 t; u8 eid; u8 safe_div; -} sama7g5_plls[][PLL_ID_MAX] = { +} sama7g5_plls[][PLL_COMPID_MAX] = { [PLL_ID_CPU] = { [PLL_COMPID_FRAC] = { .n = "cpupll_fracck", @@ -1038,7 +1039,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np) sama7g5_pmc->chws[PMC_MAIN] = hw; for (i = 0; i < PLL_ID_MAX; i++) { - for (j = 0; j < 3; j++) { + for (j = 0; j < PLL_COMPID_MAX; j++) { struct clk_hw *parent_hw; if (!sama7g5_plls[i][j].n) From 588bcce9e64cc5138858ab562268eb3943c5b06c Mon Sep 17 00:00:00 2001 From: Yunfei Dong Date: Thu, 13 Jun 2024 17:33:55 +0800 Subject: [PATCH 293/534] media: mediatek: vcodec: Fix H264 multi stateless decoder smatch warning [ Upstream commit 9be85491619f1953b8a29590ca630be571941ffa ] Fix a smatch static checker warning on vdec_h264_req_multi_if.c. Which leads to a kernel crash when fb is NULL. Fixes: 397edc703a10 ("media: mediatek: vcodec: add h264 decoder driver for mt8186") Signed-off-by: Yunfei Dong Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../vcodec/decoder/vdec/vdec_h264_req_multi_if.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c index 0e741e0dc8ba..bb0ad93c6b78 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c @@ -724,11 +724,16 @@ static int vdec_h264_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs return vpu_dec_reset(vpu); fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + if (!fb) { + mtk_vdec_err(inst->ctx, "fb buffer is NULL"); + return -ENOMEM; + } + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); - y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; - c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + y_fb_dma = fb->base_y.dma_addr; + c_fb_dma = fb->base_c.dma_addr; mtk_vdec_debug(inst->ctx, "[h264-dec] [%d] y_dma=%llx c_dma=%llx", inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma); From dbe5b7373801c261f3ea118145fbb2caac5f9324 Mon Sep 17 00:00:00 2001 From: Yunfei Dong Date: Thu, 13 Jun 2024 17:33:56 +0800 Subject: [PATCH 294/534] media: mediatek: vcodec: Fix VP8 stateless decoder smatch warning [ Upstream commit b113bc7c0e83b32f4dd2d291a2b6c4803e0a2c44 ] Fix a smatch static checker warning on vdec_vp8_req_if.c. Which leads to a kernel crash when fb is NULL. Fixes: 7a7ae26fd458 ("media: mediatek: vcodec: support stateless VP8 decoding") Signed-off-by: Yunfei Dong Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c index f64b21c07169..86b93055f163 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c @@ -335,14 +335,18 @@ static int vdec_vp8_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); - dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + if (!fb) { + mtk_vdec_err(inst->ctx, "fb buffer is NULL"); + return -ENOMEM; + } - y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + y_fb_dma = fb->base_y.dma_addr; if (inst->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) c_fb_dma = y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; else - c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + c_fb_dma = fb->base_c.dma_addr; inst->vsi->dec.bs_dma = (u64)bs->dma_addr; inst->vsi->dec.bs_sz = bs->size; From c6b9f971b43980de8893610f606d751131fb5d86 Mon Sep 17 00:00:00 2001 From: Yunfei Dong Date: Thu, 13 Jun 2024 17:33:57 +0800 Subject: [PATCH 295/534] media: mediatek: vcodec: Fix H264 stateless decoder smatch warning [ Upstream commit 7878d3a385efab560dce793b595447867fb163f2 ] Fix a smatch static checker warning on vdec_h264_req_if.c. Which leads to a kernel crash when fb is NULL. Fixes: 06fa5f757dc5 ("media: mtk-vcodec: vdec: support stateless H.264 decoding") Signed-off-by: Yunfei Dong Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c index 5600f1df653d..b55fbd6a8a66 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c @@ -347,11 +347,16 @@ static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, return vpu_dec_reset(vpu); fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + if (!fb) { + mtk_vdec_err(inst->ctx, "fb buffer is NULL"); + return -ENOMEM; + } + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); - y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; - c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + y_fb_dma = fb->base_y.dma_addr; + c_fb_dma = fb->base_c.dma_addr; mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", inst->num_nalu, y_fb_dma, c_fb_dma, fb); From effc10f00cf85fed8b96b89a57b252fc3ac7236c Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Wed, 21 Aug 2024 13:22:10 +0200 Subject: [PATCH 296/534] RDMA/rtrs: Reset hb_missed_cnt after receiving other traffic from peer [ Upstream commit 3258cbbd86deaa2675e1799bc3d18bd1ef472641 ] Reset hb_missed_cnt after receiving traffic from other peer, so hb is more robust again high load on host or network. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://patch.msgid.link/20240821112217.41827-5-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 3 ++- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 1aee62aa1515..75d3a27dea9a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -626,6 +626,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done)) return; + clt_path->s.hb_missed_cnt = 0; rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); if (imm_type == RTRS_IO_RSP_IMM || @@ -643,7 +644,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - clt_path->s.hb_missed_cnt = 0; clt_path->s.hb_cur_latency = ktime_sub(ktime_get(), clt_path->s.hb_last_sent); if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) @@ -670,6 +670,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) /* * Key invalidations from server side */ + clt_path->s.hb_missed_cnt = 0; WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 1d33efb8fb03..94ac99a4f696 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1229,6 +1229,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON(wc->wr_cqe != &io_comp_cqe)) return; + srv_path->s.hb_missed_cnt = 0; err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); From 01b9be936ee8839ab9f83a7e84ee02ac6c8303c4 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 21 Aug 2024 13:22:12 +0200 Subject: [PATCH 297/534] RDMA/rtrs-clt: Reset cid to con_num - 1 to stay in bounds [ Upstream commit 3e4289b29e216a55d08a89e126bc0b37cbad9f38 ] In the function init_conns(), after the create_con() and create_cm() for loop if something fails. In the cleanup for loop after the destroy tag, we access out of bound memory because cid is set to clt_path->s.con_num. This commits resets the cid to clt_path->s.con_num - 1, to stay in bounds in the cleanup loop later. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://patch.msgid.link/20240821112217.41827-7-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 75d3a27dea9a..82aa47efb807 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2342,6 +2342,12 @@ static int init_conns(struct rtrs_clt_path *clt_path) if (err) goto destroy; } + + /* + * Set the cid to con_num - 1, since if we fail later, we want to stay in bounds. + */ + cid = clt_path->s.con_num - 1; + err = alloc_path_reqs(clt_path); if (err) goto destroy; From 8758691ea89a13587229c26d331b8860612b6308 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 26 Aug 2024 10:35:29 -0500 Subject: [PATCH 298/534] clk: ti: dra7-atl: Fix leak of of_nodes [ Upstream commit 9d6e9f10e2e031fb7bfb3030a7d1afc561a28fea ] This fix leaking the of_node references in of_dra7_atl_clk_probe(). The docs for of_parse_phandle_with_args() say that the caller must call of_node_put() on the returned node. This adds the missing of_node_put() to fix the leak. Fixes: 9ac33b0ce81f ("CLK: TI: Driver for DRA7 ATL (Audio Tracking Logic)") Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20240826-clk-fix-leak-v1-1-f55418a13aa6@baylibre.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clk-dra7-atl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index d964e3affd42..0eab7f3e2eab 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -240,6 +240,7 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) } clk = of_clk_get_from_provider(&clkspec); + of_node_put(clkspec.np); if (IS_ERR(clk)) { pr_err("%s: failed to get atl clock %d from provider\n", __func__, i); From 521d101e9e8f11889300ccf51726d018e2720422 Mon Sep 17 00:00:00 2001 From: Yuntao Liu Date: Thu, 15 Aug 2024 09:38:53 +0000 Subject: [PATCH 299/534] clk: starfive: Use pm_runtime_resume_and_get to fix pm_runtime_get_sync() usage [ Upstream commit 55c312c1b2be6d43e39c280ad6ab4b711e545b89 ] We need to call pm_runtime_put_noidle() when pm_runtime_get_sync() fails, so use pm_runtime_resume_and_get() instead. this function will handle this. Fixes: dae5448a327ed ("clk: starfive: Add StarFive JH7110 Video-Output clock driver") Signed-off-by: Yuntao Liu Link: https://lore.kernel.org/r/20240815093853.757487-1-liuyuntao12@huawei.com Reviewed-by: Xingyu Wu Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/starfive/clk-starfive-jh7110-vout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/starfive/clk-starfive-jh7110-vout.c b/drivers/clk/starfive/clk-starfive-jh7110-vout.c index 10cc1ec43925..36340ca42cc7 100644 --- a/drivers/clk/starfive/clk-starfive-jh7110-vout.c +++ b/drivers/clk/starfive/clk-starfive-jh7110-vout.c @@ -145,7 +145,7 @@ static int jh7110_voutcrg_probe(struct platform_device *pdev) /* enable power domain and clocks */ pm_runtime_enable(priv->dev); - ret = pm_runtime_get_sync(priv->dev); + ret = pm_runtime_resume_and_get(priv->dev); if (ret < 0) return dev_err_probe(priv->dev, ret, "failed to turn on power\n"); From 6ba2624779941ac5ffa6ff5ea4a1969fe929e0cd Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Thu, 29 Aug 2024 08:28:20 +0300 Subject: [PATCH 300/534] clk: rockchip: rk3588: Fix 32k clock name for pmu_24m_32k_100m_src_p [ Upstream commit 0d02e8d284a45bfa8997ebe8764437b8eb6b108b ] The 32kHz input clock is named "xin32k" in the driver, so the name "32k" appears to be a typo in this case. Lets fix this. Signed-off-by: Alexander Shiyan Reviewed-by: Dragan Simic Fixes: f1c506d152ff ("clk: rockchip: add clock controller for the RK3588") Link: https://lore.kernel.org/r/20240829052820.3604-1-eagle.alexander923@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- drivers/clk/rockchip/clk-rk3588.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c index 6994165e0395..d8ffcaefa480 100644 --- a/drivers/clk/rockchip/clk-rk3588.c +++ b/drivers/clk/rockchip/clk-rk3588.c @@ -526,7 +526,7 @@ PNAME(pmu_200m_100m_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src" }; PNAME(pmu_300m_24m_p) = { "clk_300m_src", "xin24m" }; PNAME(pmu_400m_24m_p) = { "clk_400m_src", "xin24m" }; PNAME(pmu_100m_50m_24m_src_p) = { "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; -PNAME(pmu_24m_32k_100m_src_p) = { "xin24m", "32k", "clk_pmu1_100m_src" }; +PNAME(pmu_24m_32k_100m_src_p) = { "xin24m", "xin32k", "clk_pmu1_100m_src" }; PNAME(hclk_pmu1_root_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; PNAME(hclk_pmu_cm0_root_p) = { "clk_pmu1_400m_src", "clk_pmu1_200m_src", "clk_pmu1_100m_src", "xin24m" }; PNAME(mclk_pdm0_p) = { "clk_pmu1_300m_src", "clk_pmu1_200m_src" }; From 982dfdfd59b1a7bdd41dc04c8d0ad28bf1b0c263 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 11 Jul 2024 15:11:13 -0400 Subject: [PATCH 301/534] nfsd: remove unneeded EEXIST error check in nfsd_do_file_acquire [ Upstream commit 81a95c2b1d605743220f28db04b8da13a65c4059 ] Given that we do the search and insertion while holding the i_lock, I don't think it's possible for us to get EEXIST here. Remove this case. Fixes: c6593366c0bf ("nfsd: don't kill nfsd_files because of lease break error") Signed-off-by: Jeff Layton Tested-by: Youzhong Yang Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 07bf219f9ae4..88cefb630e17 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1040,8 +1040,6 @@ retry: if (likely(ret == 0)) goto open_file; - if (ret == -EEXIST) - goto retry; trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; goto construction_err; From a685bc3524f3f03430c97533357a69c26896bdae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Jul 2024 09:05:32 -0400 Subject: [PATCH 302/534] nfsd: fix refcount leak when file is unhashed after being found [ Upstream commit 8a7926176378460e0d91e02b03f0ff20a8709a60 ] If we wait_for_construction and find that the file is no longer hashed, and we're going to retry the open, the old nfsd_file reference is currently leaked. Put the reference before retrying. Fixes: c6593366c0bf ("nfsd: don't kill nfsd_files because of lease break error") Signed-off-by: Jeff Layton Tested-by: Youzhong Yang Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 88cefb630e17..5c9f8f8404d5 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1054,6 +1054,7 @@ wait_for_construction: status = nfserr_jukebox; goto construction_err; } + nfsd_file_put(nf); open_retry = false; fh_put(fhp); goto retry; From 4c49d34f87a20cabb7886d4df2b3d80dba06c748 Mon Sep 17 00:00:00 2001 From: Wang Jianzheng Date: Thu, 29 Aug 2024 14:48:23 +0800 Subject: [PATCH 303/534] pinctrl: mvebu: Fix devinit_dove_pinctrl_probe function [ Upstream commit c25478419f6fd3f74c324a21ec007cf14f2688d7 ] When an error occurs during the execution of the function __devinit_dove_pinctrl_probe, the clk is not properly disabled. Fix this by calling clk_disable_unprepare before return. Fixes: ba607b6238a1 ("pinctrl: mvebu: make pdma clock on dove mandatory") Signed-off-by: Wang Jianzheng Link: https://lore.kernel.org/20240829064823.19808-1-wangjianzheng@vivo.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-dove.c | 42 +++++++++++++++++++--------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-dove.c b/drivers/pinctrl/mvebu/pinctrl-dove.c index 1947da73e512..dce601d99372 100644 --- a/drivers/pinctrl/mvebu/pinctrl-dove.c +++ b/drivers/pinctrl/mvebu/pinctrl-dove.c @@ -767,7 +767,7 @@ static int dove_pinctrl_probe(struct platform_device *pdev) struct resource fb_res; struct mvebu_mpp_ctrl_data *mpp_data; void __iomem *base; - int i; + int i, ret; pdev->dev.platform_data = (void *)device_get_match_data(&pdev->dev); @@ -783,13 +783,17 @@ static int dove_pinctrl_probe(struct platform_device *pdev) clk_prepare_enable(clk); base = devm_platform_get_and_ioremap_resource(pdev, 0, &mpp_res); - if (IS_ERR(base)) - return PTR_ERR(base); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto err_probe; + } mpp_data = devm_kcalloc(&pdev->dev, dove_pinctrl_info.ncontrols, sizeof(*mpp_data), GFP_KERNEL); - if (!mpp_data) - return -ENOMEM; + if (!mpp_data) { + ret = -ENOMEM; + goto err_probe; + } dove_pinctrl_info.control_data = mpp_data; for (i = 0; i < ARRAY_SIZE(dove_mpp_controls); i++) @@ -808,8 +812,10 @@ static int dove_pinctrl_probe(struct platform_device *pdev) } mpp4_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mpp4_base)) - return PTR_ERR(mpp4_base); + if (IS_ERR(mpp4_base)) { + ret = PTR_ERR(mpp4_base); + goto err_probe; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { @@ -820,8 +826,10 @@ static int dove_pinctrl_probe(struct platform_device *pdev) } pmu_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(pmu_base)) - return PTR_ERR(pmu_base); + if (IS_ERR(pmu_base)) { + ret = PTR_ERR(pmu_base); + goto err_probe; + } gconfmap = syscon_regmap_lookup_by_compatible("marvell,dove-global-config"); if (IS_ERR(gconfmap)) { @@ -831,12 +839,17 @@ static int dove_pinctrl_probe(struct platform_device *pdev) adjust_resource(&fb_res, (mpp_res->start & INT_REGS_MASK) + GC_REGS_OFFS, 0x14); gc_base = devm_ioremap_resource(&pdev->dev, &fb_res); - if (IS_ERR(gc_base)) - return PTR_ERR(gc_base); + if (IS_ERR(gc_base)) { + ret = PTR_ERR(gc_base); + goto err_probe; + } + gconfmap = devm_regmap_init_mmio(&pdev->dev, gc_base, &gc_regmap_config); - if (IS_ERR(gconfmap)) - return PTR_ERR(gconfmap); + if (IS_ERR(gconfmap)) { + ret = PTR_ERR(gconfmap); + goto err_probe; + } } /* Warn on any missing DT resource */ @@ -844,6 +857,9 @@ static int dove_pinctrl_probe(struct platform_device *pdev) dev_warn(&pdev->dev, FW_BUG "Missing pinctrl regs in DTB. Please update your firmware.\n"); return mvebu_pinctrl_probe(pdev); +err_probe: + clk_disable_unprepare(clk); + return ret; } static struct platform_driver dove_pinctrl_driver = { From d08754be993f270e3d296d8f5d8e071fe6638651 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 2 Sep 2024 13:36:33 +0300 Subject: [PATCH 304/534] IB/core: Fix ib_cache_setup_one error flow cleanup [ Upstream commit 1403c8b14765eab805377dd3b75e96ace8747aed ] When ib_cache_update return an error, we exit ib_cache_setup_one instantly with no proper cleanup, even though before this we had already successfully done gid_table_setup_one, that results in the kernel WARN below. Do proper cleanup using gid_table_cleanup_one before returning the err in order to fix the issue. WARNING: CPU: 4 PID: 922 at drivers/infiniband/core/cache.c:806 gid_table_release_one+0x181/0x1a0 Modules linked in: CPU: 4 UID: 0 PID: 922 Comm: c_repro Not tainted 6.11.0-rc1+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:gid_table_release_one+0x181/0x1a0 Code: 44 8b 38 75 0c e8 2f cb 34 ff 4d 8b b5 28 05 00 00 e8 23 cb 34 ff 44 89 f9 89 da 4c 89 f6 48 c7 c7 d0 58 14 83 e8 4f de 21 ff <0f> 0b 4c 8b 75 30 e9 54 ff ff ff 48 8 3 c4 10 5b 5d 41 5c 41 5d 41 RSP: 0018:ffffc90002b835b0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff811c8527 RDX: 0000000000000000 RSI: ffffffff811c8534 RDI: 0000000000000001 RBP: ffff8881011b3d00 R08: ffff88810b3abe00 R09: 205d303839303631 R10: 666572207972746e R11: 72746e6520444947 R12: 0000000000000001 R13: ffff888106390000 R14: ffff8881011f2110 R15: 0000000000000001 FS: 00007fecc3b70800(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000340 CR3: 000000010435a001 CR4: 00000000003706b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x94/0xa0 ? __warn+0x9e/0x1c0 ? gid_table_release_one+0x181/0x1a0 ? report_bug+0x1f9/0x340 ? gid_table_release_one+0x181/0x1a0 ? handle_bug+0xa2/0x110 ? exc_invalid_op+0x31/0xa0 ? asm_exc_invalid_op+0x16/0x20 ? __warn_printk+0xc7/0x180 ? __warn_printk+0xd4/0x180 ? gid_table_release_one+0x181/0x1a0 ib_device_release+0x71/0xe0 ? __pfx_ib_device_release+0x10/0x10 device_release+0x44/0xd0 kobject_put+0x135/0x3d0 put_device+0x20/0x30 rxe_net_add+0x7d/0xa0 rxe_newlink+0xd7/0x190 nldev_newlink+0x1b0/0x2a0 ? __pfx_nldev_newlink+0x10/0x10 rdma_nl_rcv_msg+0x1ad/0x2e0 rdma_nl_rcv_skb.constprop.0+0x176/0x210 netlink_unicast+0x2de/0x400 netlink_sendmsg+0x306/0x660 __sock_sendmsg+0x110/0x120 ____sys_sendmsg+0x30e/0x390 ___sys_sendmsg+0x9b/0xf0 ? kstrtouint+0x6e/0xa0 ? kstrtouint_from_user+0x7c/0xb0 ? get_pid_task+0xb0/0xd0 ? proc_fail_nth_write+0x5b/0x140 ? __fget_light+0x9a/0x200 ? preempt_count_add+0x47/0xa0 __sys_sendmsg+0x61/0xd0 do_syscall_64+0x50/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 1901b91f9982 ("IB/core: Fix potential NULL pointer dereference in pkey cache") Signed-off-by: Patrisious Haddad Reviewed-by: Maher Sanalla Link: https://patch.msgid.link/79137687d829899b0b1c9835fcb4b258004c439a.1725273354.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b7251ed7a8df..0b88203720b0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1640,8 +1640,10 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) { err = ib_cache_update(device, p, true, true, true); - if (err) + if (err) { + gid_table_cleanup_one(device); return err; + } } return 0; From 95248d7497bcbfe7deed4805469c6ff6ddd7f9d1 Mon Sep 17 00:00:00 2001 From: Alexandra Diupina Date: Tue, 3 Sep 2024 14:58:23 +0300 Subject: [PATCH 305/534] PCI: kirin: Fix buffer overflow in kirin_pcie_parse_port() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c500a86693a126c9393e602741e348f80f1b0fc5 ] Within kirin_pcie_parse_port(), the pcie->num_slots is compared to pcie->gpio_id_reset size (MAX_PCI_SLOTS) which is correct and would lead to an overflow. Thus, fix condition to pcie->num_slots + 1 >= MAX_PCI_SLOTS and move pcie->num_slots increment below the if-statement to avoid out-of-bounds array access. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b22dbbb24571 ("PCI: kirin: Support PERST# GPIOs for HiKey970 external PEX 8606 bridge") Link: https://lore.kernel.org/linux-pci/20240903115823.30647-1-adiupina@astralinux.ru Signed-off-by: Alexandra Diupina [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-kirin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c index 2ee146767971..421697ec7591 100644 --- a/drivers/pci/controller/dwc/pcie-kirin.c +++ b/drivers/pci/controller/dwc/pcie-kirin.c @@ -415,12 +415,12 @@ static int kirin_pcie_parse_port(struct kirin_pcie *pcie, if (pcie->gpio_id_reset[i] < 0) continue; - pcie->num_slots++; - if (pcie->num_slots > MAX_PCI_SLOTS) { + if (pcie->num_slots + 1 >= MAX_PCI_SLOTS) { dev_err(dev, "Too many PCI slots!\n"); ret = -EINVAL; goto put_node; } + pcie->num_slots++; ret = of_pci_get_devfn(child); if (ret < 0) { From 7838f6c8a64bbc3cafeb91cde275aae2a185dc4d Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 2 Sep 2024 19:29:20 +0800 Subject: [PATCH 306/534] RDMA/erdma: Return QP state in erdma_query_qp [ Upstream commit e77127ff6416b17e0b3e630ac46ee5c9a6570f57 ] Fix qp_state and cur_qp_state to return correct values in struct ib_qp_attr. Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation") Signed-off-by: Cheng Xu Link: https://patch.msgid.link/20240902112920.58749-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/erdma/erdma_verbs.c | 25 ++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index c317947563fb..b010c4209ea3 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1540,11 +1540,31 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, return ret; } +static enum ib_qp_state query_qp_state(struct erdma_qp *qp) +{ + switch (qp->attrs.state) { + case ERDMA_QP_STATE_IDLE: + return IB_QPS_INIT; + case ERDMA_QP_STATE_RTR: + return IB_QPS_RTR; + case ERDMA_QP_STATE_RTS: + return IB_QPS_RTS; + case ERDMA_QP_STATE_CLOSING: + return IB_QPS_ERR; + case ERDMA_QP_STATE_TERMINATE: + return IB_QPS_ERR; + case ERDMA_QP_STATE_ERROR: + return IB_QPS_ERR; + default: + return IB_QPS_ERR; + } +} + int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { - struct erdma_qp *qp; struct erdma_dev *dev; + struct erdma_qp *qp; if (ibqp && qp_attr && qp_init_attr) { qp = to_eqp(ibqp); @@ -1571,6 +1591,9 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; + qp_attr->qp_state = query_qp_state(qp); + qp_attr->cur_qp_state = query_qp_state(qp); + return 0; } From 613a8d27d1e1a5d80542da44af2f301a0add4235 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 3 Sep 2024 14:24:49 +0300 Subject: [PATCH 307/534] RDMA/mlx5: Limit usage of over-sized mkeys from the MR cache [ Upstream commit ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 ] When searching the MR cache for suitable cache entries, don't use mkeys larger than twice the size required for the MR. This should ensure the usage of mkeys closer to the minimal required size and reduce memory waste. On driver init we create entries for mkeys with clear attributes and powers of 2 sizes from 4 to the max supported size. This solves the issue for anyone using mkeys that fit these requirements. In the use case where an MR is registered with different attributes, like an access flag we can't UMR, we'll create a new cache entry to store it upon dereg. Without this fix, any later registration with same attributes and smaller size will use the newly created cache entry and it's mkeys, disregarding the memory waste of using mkeys larger than required. For example, one worst-case scenario can be when registering and deregistering a 1GB mkey with ATS enabled which will cause the creation of a new cache entry to hold those type of mkeys. A user registering a 4k MR with ATS will end up using the new cache entry and an mkey that can support a 1GB MR, thus wasting x250k memory than actually needed in the HW. Additionally, allow all small registration to use the smallest size cache entry that is initialized on driver load even if size is larger than twice the required size. Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/8ba3a6e3748aace2026de8b83da03aba084f78f4.1725362530.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/mr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 50a1786231c7..9e465cf99733 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,6 +48,7 @@ enum { MAX_PENDING_REG_MR = 8, }; +#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 #define MLX5_UMR_ALIGN 2048 static void @@ -715,6 +716,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, { struct rb_node *node = dev->cache.rb_root.rb_node; struct mlx5_cache_ent *cur, *smallest = NULL; + u64 ndescs_limit; int cmp; /* @@ -733,10 +735,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, return cur; } + /* + * Limit the usage of mkeys larger than twice the required size while + * also allowing the usage of smallest cache entry for small MRs. + */ + ndescs_limit = max_t(u64, rb_key.ndescs * 2, + MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); + return (smallest && smallest->rb_key.access_mode == rb_key.access_mode && smallest->rb_key.access_flags == rb_key.access_flags && - smallest->rb_key.ats == rb_key.ats) ? + smallest->rb_key.ats == rb_key.ats && + smallest->rb_key.ndescs <= ndescs_limit) ? smallest : NULL; } @@ -986,7 +996,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) mlx5_mkey_cache_debugfs_init(dev); mutex_lock(&cache->rb_lock); for (i = 0; i <= mkey_cache_max_order(dev); i++) { - rb_key.ndescs = 1 << (i + 2); + rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); if (IS_ERR(ent)) { ret = PTR_ERR(ent); From b972bade1578595182246c1833698b18060a06f5 Mon Sep 17 00:00:00 2001 From: Jonas Blixt Date: Thu, 1 Aug 2024 14:18:45 +0200 Subject: [PATCH 308/534] watchdog: imx_sc_wdt: Don't disable WDT in suspend [ Upstream commit 2d9d6d300fb0a4ae4431bb308027ac9385746d42 ] Parts of the suspend and resume chain is left unprotected if we disable the WDT here. >From experiments we can see that the SCU disables and re-enables the WDT when we enter and leave suspend to ram. By not touching the WDT here we are protected by the WDT all the way to the SCU. Signed-off-by: Jonas Blixt CC: Anson Huang Fixes: 986857acbc9a ("watchdog: imx_sc: Add i.MX system controller watchdog support") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240801121845.1465765-1-jonas.blixt@actia.se Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/imx_sc_wdt.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 8ac021748d16..79649b0e89e4 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -213,29 +213,6 @@ register_device: return devm_watchdog_register_device(dev, wdog); } -static int __maybe_unused imx_sc_wdt_suspend(struct device *dev) -{ - struct imx_sc_wdt_device *imx_sc_wdd = dev_get_drvdata(dev); - - if (watchdog_active(&imx_sc_wdd->wdd)) - imx_sc_wdt_stop(&imx_sc_wdd->wdd); - - return 0; -} - -static int __maybe_unused imx_sc_wdt_resume(struct device *dev) -{ - struct imx_sc_wdt_device *imx_sc_wdd = dev_get_drvdata(dev); - - if (watchdog_active(&imx_sc_wdd->wdd)) - imx_sc_wdt_start(&imx_sc_wdd->wdd); - - return 0; -} - -static SIMPLE_DEV_PM_OPS(imx_sc_wdt_pm_ops, - imx_sc_wdt_suspend, imx_sc_wdt_resume); - static const struct of_device_id imx_sc_wdt_dt_ids[] = { { .compatible = "fsl,imx-sc-wdt", }, { /* sentinel */ } @@ -247,7 +224,6 @@ static struct platform_driver imx_sc_wdt_driver = { .driver = { .name = "imx-sc-wdt", .of_match_table = imx_sc_wdt_dt_ids, - .pm = &imx_sc_wdt_pm_ops, }, }; module_platform_driver(imx_sc_wdt_driver); From 85e37ac13906764d7749f877bd4fd74da36b0cc8 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 6 Sep 2024 17:34:36 +0800 Subject: [PATCH 309/534] RDMA/hns: Don't modify rq next block addr in HIP09 QPC [ Upstream commit 6928d264e328e0cb5ee7663003a6e46e4cba0a7e ] The field 'rq next block addr' in QPC can be updated by driver only on HIP08. On HIP09 HW updates this field while driver is not allowed. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a49280e2df8c..8f93aacde493 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4391,12 +4391,14 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, upper_32_bits(to_hr_hw_page_addr(mtts[0]))); hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H); - context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); - qpc_mask->rq_nxt_blk_addr = 0; - - hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, - upper_32_bits(to_hr_hw_page_addr(mtts[1]))); - hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + context->rq_nxt_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts[1])); + qpc_mask->rq_nxt_blk_addr = 0; + hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); + hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + } return 0; } From d2d9c5127122745da6e887f451dd248cfeffca33 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Fri, 6 Sep 2024 17:34:37 +0800 Subject: [PATCH 310/534] RDMA/hns: Fix Use-After-Free of rsv_qp on HIP08 [ Upstream commit fd8489294dd2beefb70f12ec4f6132aeec61a4d0 ] Currently rsv_qp is freed before ib_unregister_device() is called on HIP08. During the time interval, users can still dereg MR and rsv_qp will be used in this process, leading to a UAF. Move the release of rsv_qp after calling ib_unregister_device() to fix it. Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8f93aacde493..908b4372bac9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2932,6 +2932,9 @@ err_llm_init_failed: static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); + hns_roce_function_clear(hr_dev); if (!hr_dev->is_vf) @@ -6779,9 +6782,6 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; hns_roce_handle_device_err(hr_dev); - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) - free_mr_exit(hr_dev); - hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); From 69d9566822af04c1f2b33621826b0c9eef6c5e3d Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Fri, 6 Sep 2024 17:34:39 +0800 Subject: [PATCH 311/534] RDMA/hns: Fix the overflow risk of hem_list_calc_ba_range() [ Upstream commit d586628b169d14bbf36be64d2b3ec9d9d2fe0432 ] The max value of 'unit' and 'hop_num' is 2^24 and 2, so the value of 'step' may exceed the range of u32. Change the type of 'step' to u64. Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c4ac06a33869..a429d95a387c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1098,9 +1098,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level) * @bt_level: base address table level * @unit: ba entries per bt page */ -static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) +static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) { - u32 step; + u64 step; int max; int i; @@ -1136,7 +1136,7 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, { struct hns_roce_buf_region *r; int total = 0; - int step; + u64 step; int i; for (i = 0; i < region_cnt; i++) { @@ -1167,7 +1167,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, int ret = 0; int max_ofs; int level; - u32 step; + u64 step; int end; if (hopnum <= 1) @@ -1204,7 +1204,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } start_aligned = (distance / step) * step + r->offset; - end = min_t(int, start_aligned + step - 1, max_ofs); + end = min_t(u64, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, true); if (!cur) { @@ -1293,7 +1293,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem, *temp_hem; int total = 0; int offset; - int step; + u64 step; step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step < 1) From 094a1821903f33fb91de4b71087773ee16aeb3a0 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 6 Sep 2024 17:34:40 +0800 Subject: [PATCH 312/534] RDMA/hns: Fix spin_unlock_irqrestore() called with IRQs enabled [ Upstream commit 74d315b5af180220d561684d15897730135733a6 ] Fix missuse of spin_lock_irq()/spin_unlock_irq() when spin_lock_irqsave()/spin_lock_irqrestore() was hold. This was discovered through the lock debugging, and the corresponding log is as follows: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 96 PID: 2074 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x30/0x40 ... Call trace: warn_bogus_irq_restore+0x30/0x40 _raw_spin_unlock_irqrestore+0x84/0xc8 add_qp_to_list+0x11c/0x148 [hns_roce_hw_v2] hns_roce_create_qp_common.constprop.0+0x240/0x780 [hns_roce_hw_v2] hns_roce_create_qp+0x98/0x160 [hns_roce_hw_v2] create_qp+0x138/0x258 ib_create_qp_kernel+0x50/0xe8 create_mad_qp+0xa8/0x128 ib_mad_port_open+0x218/0x448 ib_mad_init_device+0x70/0x1f8 add_client_context+0xfc/0x220 enable_device_and_get+0xd0/0x140 ib_register_device.part.0+0xf4/0x1c8 ib_register_device+0x34/0x50 hns_roce_register_device+0x174/0x3d0 [hns_roce_hw_v2] hns_roce_init+0xfc/0x2c0 [hns_roce_hw_v2] __hns_roce_hw_v2_init_instance+0x7c/0x1d0 [hns_roce_hw_v2] hns_roce_hw_v2_init_instance+0x9c/0x180 [hns_roce_hw_v2] Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-6-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_qp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bff00b3af41f..04063cfacae5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1379,19 +1379,19 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __acquire(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq != NULL && recv_cq == NULL)) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq == NULL && recv_cq != NULL)) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else if (send_cq == recv_cq) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } @@ -1411,13 +1411,13 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, spin_unlock(&recv_cq->lock); } else if (send_cq == recv_cq) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } From 3ab289914eab74e80a70f61f8197b48d38e30ff9 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 6 Sep 2024 17:34:41 +0800 Subject: [PATCH 313/534] RDMA/hns: Fix VF triggering PF reset in abnormal interrupt handler [ Upstream commit 4321feefa5501a746ebf6a7d8b59e6b955ae1860 ] In abnormal interrupt handler, a PF reset will be triggered even if the device is a VF. It should be a VF reset. Fixes: 2b9acb9a97fe ("RDMA/hns: Add the process of AEQ overflow for hip08") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-7-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 908b4372bac9..2cdd002b7122 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6070,6 +6070,7 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, struct pci_dev *pdev = hr_dev->pci_dev; struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); const struct hnae3_ae_ops *ops = ae_dev->ops; + enum hnae3_reset_type reset_type; irqreturn_t int_work = IRQ_NONE; u32 int_en; @@ -6081,10 +6082,12 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S); + reset_type = hr_dev->is_vf ? + HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET; + /* Set reset level for reset_event() */ if (ops->set_default_reset_request) - ops->set_default_reset_request(ae_dev, - HNAE3_FUNC_RESET); + ops->set_default_reset_request(ae_dev, reset_type); if (ops->reset_event) ops->reset_event(pdev, NULL); From 288ecfd3e8aabc8a6d2303d0577e6627fdaf58fe Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 6 Sep 2024 17:34:42 +0800 Subject: [PATCH 314/534] RDMA/hns: Fix 1bit-ECC recovery address in non-4K OS [ Upstream commit ce196f6297c7f3ab7780795e40efd6c521f60c8b ] The 1bit-ECC recovery address read from HW only contain bits 64:12, so it should be fixed left-shifted 12 bits when used. Currently, the driver will shift the address left by PAGE_SHIFT when used, which is wrong in non-4K OS. Fixes: 2de949abd6a5 ("RDMA/hns: Recover 1bit-ECC error of RAM on chip") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-8-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2cdd002b7122..d226081e1cc0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6157,7 +6157,7 @@ static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data) res_type == ECC_RESOURCE_SCCC) return le64_to_cpu(*data); - return le64_to_cpu(*data) << PAGE_SHIFT; + return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT; } static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type, From b3b7ff07675c6520380609195376110a59aa3fb3 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 6 Sep 2024 17:34:43 +0800 Subject: [PATCH 315/534] RDMA/hns: Optimize hem allocation performance [ Upstream commit fe51f6254d81f5a69c31df16353d6539b2b51630 ] When allocating MTT hem, for each hop level of each hem that is being allocated, the driver iterates the hem list to find out whether the bt page has been allocated in this hop level. If not, allocate a new one and splice it to the list. The time complexity is O(n^2) in worst cases. Currently the allocation for-loop uses 'unit' as the step size. This actually has taken into account the reuse of last-hop-level MTT bt pages by multiple buffer pages. Thus pages of last hop level will never have been allocated, so there is no need to iterate the hem list in last hop level. Removing this unnecessary iteration can reduce the time complexity to O(n). Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240906093444.3571619-9-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index a429d95a387c..7ebf80504fd1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1191,10 +1191,12 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, /* config L1 bt to last bt and link them to corresponding parent */ for (level = 1; level < hopnum; level++) { - cur = hem_list_search_item(&mid_bt[level], offset); - if (cur) { - hem_ptrs[level] = cur; - continue; + if (!hem_list_is_bottom_bt(hopnum, level)) { + cur = hem_list_search_item(&mid_bt[level], offset); + if (cur) { + hem_ptrs[level] = cur; + continue; + } } step = hem_list_calc_ba_range(hopnum, level, unit); From e8721e9ba113746b1463bc6f0661c73b5694fef0 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Mon, 9 Sep 2024 14:53:31 +0800 Subject: [PATCH 316/534] RDMA/hns: Fix restricted __le16 degrades to integer issue [ Upstream commit f4ccc0a2a0c5977540f519588636b5bc81aae2db ] Fix sparse warnings: restricted __le16 degrades to integer. Fixes: 5a87279591a1 ("RDMA/hns: Support hns HW stats") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409080508.g4mNSLwy-lkp@intel.com/ Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240909065331.3950268-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d226081e1cc0..8066750afab9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1653,8 +1653,8 @@ static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) { bd_idx = i / CNT_PER_DESC; - if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) && - bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC) + if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC && + !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT))) break; cnt_data = (__le64 *)&desc[bd_idx].data[0]; From 6eff336b103fc0065dfee6f93a4f4074011f374b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 9 Sep 2024 20:30:20 +0300 Subject: [PATCH 317/534] RDMA/mlx5: Obtain upper net device only when needed [ Upstream commit 3ed7f9e239938a0cfaf3689e2f545229ecabec06 ] Report the upper device's state as the RDMA port state only in RoCE LAG or switchdev LAG. Fixes: 27f9e0ccb6da ("net/mlx5: Lag, Add single RDMA device in multiport mode") Signed-off-by: Mark Bloch Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-3-michaelgur@nvidia.com Reviewed-by: Kalesh AP Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2d179bc56ce6..296af7a5c279 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -539,7 +539,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num, if (!ndev) goto out; - if (dev->lag_active) { + if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(ndev); if (upper) { From 21ada6915c453a49273e9b84ea64b78063a901a5 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 8 Jul 2024 11:28:46 +0800 Subject: [PATCH 318/534] riscv: Fix fp alignment bug in perf_callchain_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 22ab08955ea13be04a8efd20cc30890e0afaa49c ] The standard RISC-V calling convention said: "The stack grows downward and the stack pointer is always kept 16-byte aligned". So perf_callchain_user() should check whether 16-byte aligned for fp. Link: https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf Fixes: dbeb90b0c1eb ("riscv: Add perf callchain support") Signed-off-by: Jinjie Ruan Cc: Björn Töpel Link: https://lore.kernel.org/r/20240708032847.2998158-2-ruanjinjie@huawei.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/perf_callchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 3348a61de7d9..2932791e9388 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -62,7 +62,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, perf_callchain_store(entry, regs->epc); fp = user_backtrace(entry, fp, regs->ra); - while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) + while (fp && !(fp & 0x7) && entry->nr < entry->max_stack) fp = user_backtrace(entry, fp, 0); } From 0d50ae281a1712b9b2ca72830a96b8f11882358d Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Thu, 12 Sep 2024 10:58:39 -0400 Subject: [PATCH 319/534] RDMA/cxgb4: Added NULL check for lookup_atid [ Upstream commit e766e6a92410ca269161de059fff0843b8ddd65f ] The lookup_atid() function can return NULL if the ATID is invalid or does not exist in the identifier table, which could lead to dereferencing a null pointer without a check in the `act_establish()` and `act_open_rpl()` functions. Add a NULL check to prevent null pointer dereferencing. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cfdda9d76436 ("RDMA/cxgb4: Add driver for Chelsio T4 RNIC") Signed-off-by: Mikhail Lobanov Link: https://patch.msgid.link/20240912145844.77516-1-m.lobanov@rosalinux.ru Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 040ba2224f9f..b3757c6a0457 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1222,6 +1222,8 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) int ret; ep = lookup_atid(t, atid); + if (!ep) + return -EINVAL; pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); @@ -2279,6 +2281,9 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) int ret = 0; ep = lookup_atid(t, atid); + if (!ep) + return -EINVAL; + la = (struct sockaddr_in *)&ep->com.local_addr; ra = (struct sockaddr_in *)&ep->com.remote_addr; la6 = (struct sockaddr_in6 *)&ep->com.local_addr; From b15dd2aa7987e89a2a889a4c241ad9c4a354e689 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Mon, 16 Sep 2024 21:58:05 +0500 Subject: [PATCH 320/534] RDMA/irdma: fix error message in irdma_modify_qp_roce() [ Upstream commit 9f0eafe86ea0a589676209d0cff1a1ed49a037d3 ] Use a correct field max_dest_rd_atomic instead of max_rd_atomic for the error output. Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Vitaliy Shevtsov Link: https://lore.kernel.org/stable/20240916165817.14691-1-v.shevtsov%40maxima.ru Link: https://patch.msgid.link/20240916165817.14691-1-v.shevtsov@maxima.ru Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index ca3e89909ecf..38cecb28d322 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1347,7 +1347,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) { ibdev_err(&iwdev->ibdev, "rd_atomic = %d, above max_hw_ird=%d\n", - attr->max_rd_atomic, + attr->max_dest_rd_atomic, dev->hw_attrs.max_hw_ird); return -EINVAL; } From 16e5bed6c1883b19f9fcbdff996aa3381954d5f3 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 31 Aug 2023 20:39:27 +0800 Subject: [PATCH 321/534] ntb: intel: Fix the NULL vs IS_ERR() bug for debugfs_create_dir() [ Upstream commit e229897d373a87ee09ec5cc4ecd4bb2f895fc16b ] The debugfs_create_dir() function returns error pointers. It never returns NULL. So use IS_ERR() to check it. Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Jinjie Ruan Reviewed-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/intel/ntb_hw_gen1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 9ab836d0d4f1..079b8cd79785 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -778,7 +778,7 @@ static void ndev_init_debugfs(struct intel_ntb_dev *ndev) ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->ntb.pdev), debugfs_dir); - if (!ndev->debugfs_dir) + if (IS_ERR(ndev->debugfs_dir)) ndev->debugfs_info = NULL; else ndev->debugfs_info = From fd8932cf6b763473c371cd62f503bb9d8dd6ddf7 Mon Sep 17 00:00:00 2001 From: Max Hawking Date: Sun, 8 Oct 2023 20:45:16 -0700 Subject: [PATCH 322/534] ntb_perf: Fix printk format [ Upstream commit 1501ae7479c8d0f66efdbfdc9ae8d6136cefbd37 ] The correct printk format is %pa or %pap, but not %pa[p]. Fixes: 99a06056124d ("NTB: ntb_perf: Fix address err in perf_copy_chunk") Signed-off-by: Max Hawking Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/test/ntb_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 553f1f46bc66..72bc1d017a46 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -1227,7 +1227,7 @@ static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, "\tOut buffer addr 0x%pK\n", peer->outbuf); pos += scnprintf(buf + pos, buf_size - pos, - "\tOut buff phys addr %pa[p]\n", &peer->out_phys_addr); + "\tOut buff phys addr %pap\n", &peer->out_phys_addr); pos += scnprintf(buf + pos, buf_size - pos, "\tOut buffer size %pa\n", &peer->outbuf_size); From b743922b5aadbe714e08545d4002f23554162b4f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 Sep 2024 14:22:07 -0700 Subject: [PATCH 323/534] ntb: Force physically contiguous allocation of rx ring buffers [ Upstream commit 061a785a114f159e990ea8ed8d1b7dca4b41120f ] Physical addresses under IOVA on x86 platform are mapped contiguously as a side effect before the patch that removed CONFIG_DMA_REMAP. The NTB rx buffer ring is a single chunk DMA buffer that is allocated against the NTB PCI device. If the receive side is using a DMA device, then the buffers are remapped against the DMA device before being submitted via the dmaengine API. This scheme becomes a problem when the physical memory is discontiguous. When dma_map_page() is called on the kernel virtual address from the dma_alloc_coherent() call, the new IOVA mapping no longer points to all the physical memory allocated due to being discontiguous. Change dma_alloc_coherent() to dma_alloc_attrs() in order to force DMA_ATTR_FORCE_CONTIGUOUS attribute. This is the best fix for the circumstance. A potential future solution may be having the DMA mapping API providing a way to alias an existing IOVA mapping to a new device perhaps. This fix is not to fix the patch pointed to by the fixes tag, but to fix the issue arised in the ntb_transport driver on x86 platforms after the said patch is applied. Reported-by: Jerry Dai Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP") Tested-by: Jerry Dai Signed-off-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/ntb_transport.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f9e7847a378e..c84fadfc63c5 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -807,16 +807,29 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) } static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, - struct device *dma_dev, size_t align) + struct device *ntb_dev, size_t align) { dma_addr_t dma_addr; void *alloc_addr, *virt_addr; int rc; - alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size, - &dma_addr, GFP_KERNEL); + /* + * The buffer here is allocated against the NTB device. The reason to + * use dma_alloc_*() call is to allocate a large IOVA contiguous buffer + * backing the NTB BAR for the remote host to write to. During receive + * processing, the data is being copied out of the receive buffer to + * the kernel skbuff. When a DMA device is being used, dma_map_page() + * is called on the kvaddr of the receive buffer (from dma_alloc_*()) + * and remapped against the DMA device. It appears to be a double + * DMA mapping of buffers, but first is mapped to the NTB device and + * second is to the DMA device. DMA_ATTR_FORCE_CONTIGUOUS is necessary + * in order for the later dma_map_page() to not fail. + */ + alloc_addr = dma_alloc_attrs(ntb_dev, mw->alloc_size, + &dma_addr, GFP_KERNEL, + DMA_ATTR_FORCE_CONTIGUOUS); if (!alloc_addr) { - dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n", + dev_err(ntb_dev, "Unable to alloc MW buff of size %zu\n", mw->alloc_size); return -ENOMEM; } @@ -845,7 +858,7 @@ static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, return 0; err: - dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr); + dma_free_coherent(ntb_dev, mw->alloc_size, alloc_addr, dma_addr); return rc; } From a1afbbb5276f943ad7173d0b4c626b8c75a260da Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 21 Aug 2024 22:03:18 +0800 Subject: [PATCH 324/534] nfsd: call cache_put if xdr_reserve_space returns NULL [ Upstream commit d078cbf5c38de83bc31f83c47dcd2184c04a50c7 ] If not enough buffer space available, but idmap_lookup has triggered lookup_fn which calls cache_get and returns successfully. Then we missed to call cache_put here which pairs with cache_get. Fixes: ddd1ea563672 ("nfsd4: use xdr_reserve_space in attribute encoding") Signed-off-by: Guoqing Jiang Reviwed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4idmap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 7a806ac13e31..8cca1329f348 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -581,6 +581,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, .id = id, .type = type, }; + __be32 status = nfs_ok; __be32 *p; int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -593,12 +594,16 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, return nfserrno(ret); ret = strlen(item->name); WARN_ON_ONCE(ret > IDMAP_NAMESZ); + p = xdr_reserve_space(xdr, ret + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, item->name, ret); + if (unlikely(!p)) { + status = nfserr_resource; + goto out_put; + } + xdr_encode_opaque(p, item->name, ret); +out_put: cache_put(&item->h, nn->idtoname_cache); - return 0; + return status; } static bool From 318f70857caab3da9a6ada9bc8c1f4f7591b695e Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 3 Sep 2024 19:14:46 +0800 Subject: [PATCH 325/534] nfsd: return -EINVAL when namelen is 0 [ Upstream commit 22451a16b7ab7debefce660672566be887db1637 ] When we have a corrupted main.sqlite in /var/lib/nfs/nfsdcld/, it may result in namelen being 0, which will cause memdup_user() to return ZERO_SIZE_PTR. When we access the name.data that has been assigned the value of ZERO_SIZE_PTR in nfs4_client_to_reclaim(), null pointer dereference is triggered. [ T1205] ================================================================== [ T1205] BUG: KASAN: null-ptr-deref in nfs4_client_to_reclaim+0xe9/0x260 [ T1205] Read of size 1 at addr 0000000000000010 by task nfsdcld/1205 [ T1205] [ T1205] CPU: 11 PID: 1205 Comm: nfsdcld Not tainted 5.10.0-00003-g2c1423731b8d #406 [ T1205] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ T1205] Call Trace: [ T1205] dump_stack+0x9a/0xd0 [ T1205] ? nfs4_client_to_reclaim+0xe9/0x260 [ T1205] __kasan_report.cold+0x34/0x84 [ T1205] ? nfs4_client_to_reclaim+0xe9/0x260 [ T1205] kasan_report+0x3a/0x50 [ T1205] nfs4_client_to_reclaim+0xe9/0x260 [ T1205] ? nfsd4_release_lockowner+0x410/0x410 [ T1205] cld_pipe_downcall+0x5ca/0x760 [ T1205] ? nfsd4_cld_tracking_exit+0x1d0/0x1d0 [ T1205] ? down_write_killable_nested+0x170/0x170 [ T1205] ? avc_policy_seqno+0x28/0x40 [ T1205] ? selinux_file_permission+0x1b4/0x1e0 [ T1205] rpc_pipe_write+0x84/0xb0 [ T1205] vfs_write+0x143/0x520 [ T1205] ksys_write+0xc9/0x170 [ T1205] ? __ia32_sys_read+0x50/0x50 [ T1205] ? ktime_get_coarse_real_ts64+0xfe/0x110 [ T1205] ? ktime_get_coarse_real_ts64+0xa2/0x110 [ T1205] do_syscall_64+0x33/0x40 [ T1205] entry_SYSCALL_64_after_hwframe+0x67/0xd1 [ T1205] RIP: 0033:0x7fdbdb761bc7 [ T1205] Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 514 [ T1205] RSP: 002b:00007fff8c4b7248 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ T1205] RAX: ffffffffffffffda RBX: 000000000000042b RCX: 00007fdbdb761bc7 [ T1205] RDX: 000000000000042b RSI: 00007fff8c4b75f0 RDI: 0000000000000008 [ T1205] RBP: 00007fdbdb761bb0 R08: 0000000000000000 R09: 0000000000000001 [ T1205] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000042b [ T1205] R13: 0000000000000008 R14: 00007fff8c4b75f0 R15: 0000000000000000 [ T1205] ================================================================== Fix it by checking namelen. Signed-off-by: Li Lingfeng Fixes: 74725959c33c ("nfsd: un-deprecate nfsdcld") Reviewed-by: Jeff Layton Reviewed-by: Scott Mayhew Tested-by: Scott Mayhew Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4recover.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3509e73abe1f..4395577825a7 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -806,6 +806,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, ci = &cmsg->cm_u.cm_clntinfo; if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; + if (!namelen) { + dprintk("%s: namelen should not be zero", __func__); + return -EINVAL; + } name.data = memdup_user(&ci->cc_name.cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); @@ -828,6 +832,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, cnm = &cmsg->cm_u.cm_name; if (get_user(namelen, &cnm->cn_len)) return -EFAULT; + if (!namelen) { + dprintk("%s: namelen should not be zero", __func__); + return -EINVAL; + } name.data = memdup_user(&cnm->cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); From 21b4fa3bffc00d61cbf49370dddc4766c7611220 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 12 Sep 2024 17:57:13 +0800 Subject: [PATCH 326/534] crypto: caam - Pad SG length when allocating hash edesc [ Upstream commit 5124bc96162667766f6120b19f57a640c2eccb2a ] Because hardware will read in multiples of 4 SG entries, ensure the allocated length is always padded. This was already done by some callers of ahash_edesc_alloc, but ahash_digest was conspicuously missing. In any case, doing it in the allocation function ensures that the memory is always there. Reported-by: Guangwu Zhang Fixes: a5e5c13398f3 ("crypto: caam - fix S/G table passing page boundary") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/caam/caamhash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 290c8500c247..65785dc5b73b 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -708,6 +708,7 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; + sg_num = pad_sg_nents(sg_num); edesc = kzalloc(struct_size(edesc, sec4_sg, sg_num), flags); if (!edesc) return NULL; From 8edf3a4038f4c267b84099f40dbbf659a4a96a55 Mon Sep 17 00:00:00 2001 From: Danny Tsen Date: Thu, 19 Sep 2024 07:36:37 -0400 Subject: [PATCH 327/534] crypto: powerpc/p10-aes-gcm - Disable CRYPTO_AES_GCM_P10 [ Upstream commit 44ac4625ea002deecd0c227336c95b724206c698 ] Data mismatch found when testing ipsec tunnel with AES/GCM crypto. Disabling CRYPTO_AES_GCM_P10 in Kconfig for this feature. Fixes: fd0e9b3e2ee6 ("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation") Fixes: cdcecfd9991f ("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation") Fixes: 45a4672b9a6e2 ("crypto: p10-aes-gcm - Update Kconfig and Makefile") Signed-off-by: Danny Tsen Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/powerpc/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 6fc2248ca561..fccf742c55c2 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -96,6 +96,7 @@ config CRYPTO_AES_PPC_SPE config CRYPTO_AES_GCM_P10 tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)" + depends on BROKEN depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_LIB_AES select CRYPTO_ALGAPI From d889928bbc6915bfe147ff438c34e12dcc6447e1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 11:13:48 +0800 Subject: [PATCH 328/534] f2fs: atomic: fix to avoid racing w/ GC [ Upstream commit 1a0bd289a5db1df8df8fab949633a0b8d3f235ee ] Case #1: SQLite App GC Thread Kworker Shrinker - f2fs_ioc_start_atomic_write - f2fs_ioc_commit_atomic_write - f2fs_commit_atomic_write - filemap_write_and_wait_range : write atomic_file's data to cow_inode echo 3 > drop_caches to drop atomic_file's cache. - f2fs_gc - gc_data_segment - move_data_page - set_page_dirty - writepages - f2fs_do_write_data_page : overwrite atomic_file's data to cow_inode - f2fs_down_write(&fi->i_gc_rwsem[WRITE]) - __f2fs_commit_atomic_write - f2fs_up_write(&fi->i_gc_rwsem[WRITE]) Case #2: SQLite App GC Thread Kworker - f2fs_ioc_start_atomic_write - __writeback_single_inode - do_writepages - f2fs_write_cache_pages - f2fs_write_single_data_page - f2fs_do_write_data_page : write atomic_file's data to cow_inode - f2fs_gc - gc_data_segment - move_data_page - set_page_dirty - writepages - f2fs_do_write_data_page : overwrite atomic_file's data to cow_inode - f2fs_ioc_commit_atomic_write In above cases racing in between atomic_write and GC, previous data in atomic_file may be overwrited to cow_file, result in data corruption. This patch introduces PAGE_PRIVATE_ATOMIC_WRITE bit flag in page.private, and use it to indicate that there is last dirty data in atomic file, and the data should be writebacked into cow_file, if the flag is not tagged in page, we should never write data across files. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 10 +++++++++- fs/f2fs/f2fs.h | 8 +++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 84fc87018180..d54644d38684 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2623,10 +2623,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + page_private_atomic(fio->page); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -2730,6 +2733,8 @@ got_it: f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); + if (atomic_commit) + clear_page_private_atomic(page); out_writepage: f2fs_put_dnode(&dn); out: @@ -3700,6 +3705,9 @@ static int f2fs_write_end(struct file *file, set_page_dirty(page); + if (f2fs_is_atomic_file(inode)) + set_page_private_atomic(page); + if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { f2fs_i_size_write(inode, pos + copied); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6371b295fba6..15780e467a64 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1411,7 +1411,8 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION * bit 2 PAGE_PRIVATE_INLINE_INODE * bit 3 PAGE_PRIVATE_REF_RESOURCE - * bit 4- f2fs private data + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE + * bit 5- f2fs private data * * Layout B: lowest bit should be 0 * page.private is a wrapped pointer. @@ -1421,6 +1422,7 @@ enum { PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -2386,14 +2388,17 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2425,6 +2430,7 @@ static inline void clear_page_private_all(struct page *page) clear_page_private_reference(page); clear_page_private_gcing(page); clear_page_private_inline(page); + clear_page_private_atomic(page); f2fs_bug_on(F2FS_P_SB(page), page_private(page)); } From 6c59f87e1eea5c737ef90bce3119ecca0e8c6d01 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jun 2024 09:47:27 +0800 Subject: [PATCH 329/534] f2fs: reduce expensive checkpoint trigger frequency [ Upstream commit aaf8c0b9ae042494cb4585883b15c1332de77840 ] We may trigger high frequent checkpoint for below case: 1. mkdir /mnt/dir1; set dir1 encrypted 2. touch /mnt/file1; fsync /mnt/file1 3. mkdir /mnt/dir2; set dir2 encrypted 4. touch /mnt/file2; fsync /mnt/file2 ... Although, newly created dir and file are not related, due to commit bbf156f7afa7 ("f2fs: fix lost xattrs of directories"), we will trigger checkpoint whenever fsync() comes after a new encrypted dir created. In order to avoid such performance regression issue, let's record an entry including directory's ino in global cache whenever we update directory's xattr data, and then triggerring checkpoint() only if xattr metadata of target file's parent was updated. This patch updates to cover below no encryption case as well: 1) parent is checkpointed 2) set_xattr(dir) w/ new xnid 3) create(file) 4) fsync(file) Fixes: bbf156f7afa7 ("f2fs: fix lost xattrs of directories") Reported-by: wangzijie Reported-by: Zhiguo Niu Tested-by: Zhiguo Niu Reported-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 3 +++ fs/f2fs/xattr.c | 14 ++++++++++++-- include/trace/events/f2fs.h | 3 ++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 15780e467a64..f7a8259b9180 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -282,6 +282,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -1149,6 +1150,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 523896200908..81e342bd993c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -213,6 +213,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e47cc917d118..54ab9caaae4d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -629,6 +629,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -772,9 +773,18 @@ retry: if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 793f82cc1515..b6ffae01a8cd 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE); { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ { CP_SPEC_LOG_NUM, "log type is 2" }, \ - { CP_RECOVER_DIR, "dir needs recovery" }) + { CP_RECOVER_DIR, "dir needs recovery" }, \ + { CP_XATTR_DIR, "dir's xattr updated" }) #define show_shutdown_mode(type) \ __print_symbolic(type, \ From 67c3c4638f22942d900386f11fc10ed6a3520910 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jun 2024 15:15:21 +0800 Subject: [PATCH 330/534] f2fs: fix to avoid racing in between read and OPU dio write [ Upstream commit 0cac51185e65dc2a20686184e02f3cafc99eb202 ] If lfs mode is on, buffered read may race w/ OPU dio write as below, it may cause buffered read hits unwritten data unexpectly, and for dio read, the race condition exists as well. Thread A Thread B - f2fs_file_write_iter - f2fs_dio_write_iter - __iomap_dio_rw - f2fs_iomap_begin - f2fs_map_blocks - __allocate_data_block - allocated blkaddr #x - iomap_dio_submit_bio - f2fs_file_read_iter - filemap_read - f2fs_read_data_folio - f2fs_mpage_readpages - f2fs_map_blocks : get blkaddr #x - f2fs_submit_read_bio IRQ - f2fs_read_end_io : read IO on blkaddr #x complete IRQ - iomap_dio_bio_end_io : direct write IO on blkaddr #x complete In LFS mode, if there is inflight dio, let's wait for its completion, this policy won't cover all race cases, however it is a tradeoff which avoids abusing lock around IO paths. Fixes: f847c699cff3 ("f2fs: allow out-place-update for direct IO in LFS mode") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 81e342bd993c..8c4c5c6c1ac6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4541,6 +4541,10 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode))) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { From 87f9d26fcc50531315b1b127f35a78f69e1e45d0 Mon Sep 17 00:00:00 2001 From: Yeongjin Gil Date: Tue, 13 Aug 2024 16:32:44 +0900 Subject: [PATCH 331/534] f2fs: Create COW inode from parent dentry for atomic write [ Upstream commit 8c1b787938fd86bab27a1492fa887408c75fec2b ] The i_pino in f2fs_inode_info has the previous parent's i_ino when inode was renamed, which may cause f2fs_ioc_start_atomic_write to fail. If file_wrong_pino is true and i_nlink is 1, then to find a valid pino, we should refer to the dentry from inode. To resolve this issue, let's get parent inode using parent dentry directly. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Reviewed-by: Sungjong Seo Reviewed-by: Sunmin Jeong Signed-off-by: Yeongjin Gil Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8c4c5c6c1ac6..a0d2e180f0ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2097,7 +2097,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; @@ -2146,15 +2145,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); if (ret) { f2fs_up_write(&fi->i_gc_rwsem[WRITE]); goto out; From 1bb0686a2e8af8ce467e6c7aa318f1dc5d1c88cc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Aug 2024 22:12:42 +0800 Subject: [PATCH 332/534] f2fs: fix to wait page writeback before setting gcing flag [ Upstream commit a4d7f2b3238fd5f76b9e6434a0bd5d2e29049cff ] Soft IRQ Thread - f2fs_write_end_io - f2fs_defragment_range - set_page_private_gcing - type = WB_DATA_TYPE(page, false); : assign type w/ F2FS_WB_CP_DATA due to page_private_gcing() is true - dec_page_count() w/ wrong type - end_page_writeback() Value of F2FS_WB_CP_DATA reference count may become negative under above race condition, the root cause is we missed to wait page writeback before setting gcing page private flag, let's fix it. Fixes: 2d1fe8a86bf5 ("f2fs: fix to tag gcing flag on page during file defragment") Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0d2e180f0ed..e04f4dcb76d1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2743,6 +2743,8 @@ do_map: goto clear_out; } + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -4137,6 +4139,8 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) /* It will never fail, when page has pinned above */ f2fs_bug_on(F2FS_I_SB(inode), !page); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); From 783b6ca3428fcc01426d716c9e8b0dba51eb0a2a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 7 Aug 2024 18:24:35 +0800 Subject: [PATCH 333/534] f2fs: atomic: fix to truncate pagecache before on-disk metadata truncation [ Upstream commit ebd3309aec6271c4616573b0cb83ea25e623070a ] We should always truncate pagecache while truncating on-disk data. Fixes: a46bebd502fe ("f2fs: synchronize atomic write aborts") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e04f4dcb76d1..2f08bf7f2962 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2161,6 +2161,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); if (ret) { f2fs_up_write(&fi->i_gc_rwsem[WRITE]); From f2971778b2cb08392426064981d5d85ff021ce98 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Feb 2024 22:38:38 +0800 Subject: [PATCH 334/534] f2fs: support .shutdown in f2fs_sops [ Upstream commit ee745e4736fbf33079d0d0808e1343c2280fd59a ] Support .shutdown callback in f2fs_sops, then, it can be called to shut down the file system when underlying block device is marked dead. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: c7f114d864ac ("f2fs: fix to avoid use-after-free in f2fs_stop_gc_thread()") Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 70 ++++++++++++++++++++++++++++++------------------- fs/f2fs/super.c | 6 +++++ 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f7a8259b9180..2059a24a330b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3507,6 +3507,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f08bf7f2962..085008b20802 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2256,34 +2256,13 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) return ret; } -static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - __u32 in; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__u32 __user *)arg)) - return -EFAULT; - - if (in != F2FS_GOING_DOWN_FULLSYNC) { - ret = mnt_want_write_file(filp); - if (ret) { - if (ret == -EROFS) { - ret = 0; - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_SHUTDOWN); - trace_f2fs_shutdown(sbi, in, ret); - } - return ret; - } - } - - switch (in) { + switch (flag) { case F2FS_GOING_DOWN_FULLSYNC: ret = freeze_bdev(sb->s_bdev); if (ret) @@ -2317,6 +2296,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) goto out; } + if (readonly) + goto out; + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); @@ -2325,10 +2307,44 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_update_time(sbi, REQ_TIME); out: - if (in != F2FS_GOING_DOWN_FULLSYNC) - mnt_drop_write_file(filp); - trace_f2fs_shutdown(sbi, in, ret); + trace_f2fs_shutdown(sbi, flag, ret); + + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + __u32 in; + int ret; + bool need_drop = false, readonly = false; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) { + if (ret != -EROFS) + return ret; + + /* fallback to nosync shutdown for readonly fs */ + in = F2FS_GOING_DOWN_NOSYNC; + readonly = true; + } else { + need_drop = true; + } + } + + ret = f2fs_do_shutdown(sbi, in, readonly); + + if (need_drop) + mnt_drop_write_file(filp); return ret; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e022d8233c0a..d12603c3b5f5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2543,6 +2543,11 @@ restore_opts: return err; } +static void f2fs_shutdown(struct super_block *sb) +{ + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); +} + #ifdef CONFIG_QUOTA static bool f2fs_need_recovery(struct f2fs_sb_info *sbi) { @@ -3142,6 +3147,7 @@ static const struct super_operations f2fs_sops = { .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, .remount_fs = f2fs_remount, + .shutdown = f2fs_shutdown, }; #ifdef CONFIG_FS_ENCRYPTION From fc18e655b62ac6bc9f12f5de0d749b4a3fe1e812 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 30 Jul 2024 09:08:55 +0800 Subject: [PATCH 335/534] f2fs: fix to avoid use-after-free in f2fs_stop_gc_thread() [ Upstream commit c7f114d864ac91515bb07ac271e9824a20f5ed95 ] syzbot reports a f2fs bug as below: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_report+0xe8/0x550 mm/kasan/report.c:491 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_fetch_add_relaxed include/linux/atomic/atomic-instrumented.h:252 [inline] __refcount_add include/linux/refcount.h:184 [inline] __refcount_inc include/linux/refcount.h:241 [inline] refcount_inc include/linux/refcount.h:258 [inline] get_task_struct include/linux/sched/task.h:118 [inline] kthread_stop+0xca/0x630 kernel/kthread.c:704 f2fs_stop_gc_thread+0x65/0xb0 fs/f2fs/gc.c:210 f2fs_do_shutdown+0x192/0x540 fs/f2fs/file.c:2283 f2fs_ioc_shutdown fs/f2fs/file.c:2325 [inline] __f2fs_ioctl+0x443a/0xbe60 fs/f2fs/file.c:4325 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is below race condition, it may cause use-after-free issue in sbi->gc_th pointer. - remount - f2fs_remount - f2fs_stop_gc_thread - kfree(gc_th) - f2fs_ioc_shutdown - f2fs_do_shutdown - f2fs_stop_gc_thread - kthread_stop(gc_th->f2fs_gc_task) : sbi->gc_thread = NULL; We will call f2fs_do_shutdown() in two paths: - for f2fs_ioc_shutdown() path, we should grab sb->s_umount semaphore for fixing. - for f2fs_shutdown() path, it's safe since caller has already grabbed sb->s_umount semaphore. Reported-by: syzbot+1a8e2b31f2ac9bd3d148@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/0000000000005c7ccb061e032b9b@google.com Fixes: 7950e9ac638e ("f2fs: stop gc/discard thread after fs shutdown") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 11 +++++++++-- fs/f2fs/super.c | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2059a24a330b..6c9b1759f4e7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3508,7 +3508,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly); + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 085008b20802..0c279ba45120 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2257,7 +2257,7 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) } int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly) + bool readonly, bool need_lock) { struct super_block *sb = sbi->sb; int ret = 0; @@ -2299,12 +2299,19 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, if (readonly) goto out; + /* grab sb->s_umount to avoid racing w/ remount() */ + if (need_lock) + down_read(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_read(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: @@ -2341,7 +2348,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } } - ret = f2fs_do_shutdown(sbi, in, readonly); + ret = f2fs_do_shutdown(sbi, in, readonly, true); if (need_drop) mnt_drop_write_file(filp); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d12603c3b5f5..2bec5176df65 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2545,7 +2545,7 @@ restore_opts: static void f2fs_shutdown(struct super_block *sb) { - f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); } #ifdef CONFIG_QUOTA From 4263b3ef81e4b671278de003bfbbac8675bde62f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 28 Aug 2023 22:04:16 +0800 Subject: [PATCH 336/534] f2fs: compress: do sanity check on cluster when CONFIG_F2FS_CHECK_FS is on [ Upstream commit 2aaea533bf063ed3b442df5fe5f6abfc538054c9 ] This patch covers sanity check logic on cluster w/ CONFIG_F2FS_CHECK_FS, otherwise, there will be performance regression while querying cluster mapping info. Callers of f2fs_is_compressed_cluster() only care about whether cluster is compressed or not, rather than # of valid blocks in compressed cluster, so, let's adjust f2fs_is_compressed_cluster()'s logic according to caller's requirement. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: f785cec298c9 ("f2fs: compress: don't redirty sparse cluster during {,de}compress") Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 61 ++++++++++++++++++++++++++-------------------- fs/f2fs/data.c | 4 +-- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c07fe6b840a0..995f6544cc30 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -887,14 +887,15 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { +#ifdef CONFIG_F2FS_CHECK_FS struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; - bool compressed = dn->data_blkaddr == COMPRESS_ADDR; int cluster_end = 0; + unsigned int count; int i; char *reason = ""; - if (!compressed) + if (dn->data_blkaddr != COMPRESS_ADDR) return false; /* [..., COMPR_ADDR, ...] */ @@ -903,7 +904,7 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) goto out; } - for (i = 1; i < cluster_size; i++) { + for (i = 1, count = 1; i < cluster_size; i++, count++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -923,19 +924,42 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) goto out; } } + + f2fs_bug_on(F2FS_I_SB(dn->inode), count != cluster_size && + !is_inode_flag_set(dn->inode, FI_COMPRESS_RELEASED)); + return false; out: f2fs_warn(sbi, "access invalid cluster, ino:%lu, nid:%u, ofs_in_node:%u, reason:%s", dn->inode->i_ino, dn->nid, dn->ofs_in_node, reason); set_sbi_flag(sbi, SBI_NEED_FSCK); return true; +#else + return false; +#endif +} + +static int __f2fs_get_cluster_blocks(struct inode *inode, + struct dnode_of_data *dn) +{ + unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, i; + + for (i = 1, count = 1; i < cluster_size; i++) { + block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (__is_valid_data_blkaddr(blkaddr)) + count++; + } + + return count; } static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr) + unsigned int cluster_idx, bool compr_blks) { struct dnode_of_data dn; - unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; unsigned int start_idx = cluster_idx << F2FS_I(inode)->i_log_cluster_size; int ret; @@ -950,31 +974,14 @@ static int __f2fs_cluster_blocks(struct inode *inode, if (f2fs_sanity_check_cluster(&dn)) { ret = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_CLUSTER); goto fail; } if (dn.data_blkaddr == COMPRESS_ADDR) { - int i; - - ret = 1; - for (i = 1; i < cluster_size; i++) { - block_t blkaddr; - - blkaddr = data_blkaddr(dn.inode, - dn.node_page, dn.ofs_in_node + i); - if (compr) { - if (__is_valid_data_blkaddr(blkaddr)) - ret++; - } else { - if (blkaddr != NULL_ADDR) - ret++; - } - } - - f2fs_bug_on(F2FS_I_SB(inode), - !compr && ret != cluster_size && - !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)); + if (compr_blks) + ret = __f2fs_get_cluster_blocks(inode, &dn); + else + ret = 1; } fail: f2fs_put_dnode(&dn); @@ -987,7 +994,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); } -/* return # of valid blocks in compressed cluster */ +/* return whether cluster is compressed one or not */ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d54644d38684..1c59a3b2b2c3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1614,9 +1614,7 @@ next_block: map->m_flags |= F2FS_MAP_NEW; } else if (is_hole) { if (f2fs_compressed_file(inode) && - f2fs_sanity_check_cluster(&dn) && - (flag != F2FS_GET_BLOCK_FIEMAP || - IS_ENABLED(CONFIG_F2FS_CHECK_FS))) { + f2fs_sanity_check_cluster(&dn)) { err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_CLUSTER); From b6f186bd6aeeb618af445ca73887de7301ccf639 Mon Sep 17 00:00:00 2001 From: Yeongjin Gil Date: Mon, 19 Aug 2024 17:34:30 +0900 Subject: [PATCH 337/534] f2fs: compress: don't redirty sparse cluster during {,de}compress [ Upstream commit f785cec298c95d00058560c0715233294a04b8f3 ] In f2fs_do_write_data_page, when the data block is NULL_ADDR, it skips writepage considering that it has been already truncated. This results in an infinite loop as the PAGECACHE_TAG_TOWRITE tag is not cleared during the writeback process for a compressed file including NULL_ADDR in compress_mode=user. This is the reproduction process: 1. dd if=/dev/zero bs=4096 count=1024 seek=1024 of=testfile 2. f2fs_io compress testfile 3. dd if=/dev/zero bs=4096 count=1 conv=notrunc of=testfile 4. f2fs_io decompress testfile To prevent the problem, let's check whether the cluster is fully allocated before redirty its pages. Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE") Reviewed-by: Sungjong Seo Reviewed-by: Sunmin Jeong Tested-by: Jaewook Kim Signed-off-by: Yeongjin Gil Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 36 ++++++++++++++++++++++++++++-------- fs/f2fs/f2fs.h | 12 ++++++++++++ fs/f2fs/file.c | 39 +++++++++++++++++++++------------------ 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 995f6544cc30..f7ef69f44f3d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -945,7 +945,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; int count, i; - for (i = 1, count = 1; i < cluster_size; i++) { + for (i = 0, count = 0; i < cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -956,8 +956,8 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, return count; } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr_blks) +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) { struct dnode_of_data dn; unsigned int start_idx = cluster_idx << @@ -978,10 +978,12 @@ static int __f2fs_cluster_blocks(struct inode *inode, } if (dn.data_blkaddr == COMPRESS_ADDR) { - if (compr_blks) - ret = __f2fs_get_cluster_blocks(inode, &dn); - else + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -991,7 +993,16 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } /* return whether cluster is compressed one or not */ @@ -999,7 +1010,16 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6c9b1759f4e7..98a098cfcc3c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4284,6 +4284,11 @@ static inline bool f2fs_meta_inode_gc_required(struct inode *inode) * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; bool f2fs_is_compressed_page(struct page *page); struct page *f2fs_compress_control_page(struct page *page); int f2fs_prepare_compress_overwrite(struct inode *inode, @@ -4310,6 +4315,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); @@ -4396,6 +4402,12 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0c279ba45120..f2969d860342 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4182,9 +4182,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4217,10 +4216,15 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4230,9 +4234,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4258,9 +4259,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4292,10 +4293,15 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4305,9 +4311,6 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; From 364afd8aa824e85da5b332fd31b3d181ca93a464 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 4 Sep 2024 08:33:06 -0700 Subject: [PATCH 338/534] f2fs: prevent atomic file from being dirtied before commit [ Upstream commit fccaa81de87e80b1809906f7e438e5766fbdc172 ] Keep atomic file clean while updating and make it dirtied during commit in order to avoid unnecessary and excessive inode updates in the previous fix. Fixes: 4bf78322346f ("f2fs: mark inode dirty for FI_ATOMIC_COMMITTED flag") Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 5 +++++ fs/f2fs/segment.c | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 98a098cfcc3c..028577fffb58 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -798,6 +798,7 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ @@ -3039,7 +3040,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: - case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 709b2f79872f..a3e0c9273543 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -35,6 +35,11 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (f2fs_inode_dirtied(inode, sync)) return; + if (f2fs_is_atomic_file(inode)) { + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return; + } + mark_inode_dirty_sync(inode); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e3e2c0b2f495..c0ba379a6d8f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -199,6 +199,10 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; @@ -368,6 +372,10 @@ out: } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); From 66b1b8254d20448c0449a3a9bc0bf3db8398d938 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 16 Nov 2023 14:25:54 +0800 Subject: [PATCH 339/534] f2fs: clean up w/ dotdot_name [ Upstream commit ff6584ac2c4b4ee8e1fca20bffaaa387d8fe2974 ] Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 884ee6dc85b9 ("f2fs: get rid of online repaire on corrupted directory") Signed-off-by: Sasha Levin --- fs/f2fs/namei.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7bca22e5dec4..7dbd541e7bd6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -459,7 +459,6 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct qstr dot = QSTR_INIT(".", 1); - struct qstr dotdot = QSTR_INIT("..", 2); struct f2fs_dir_entry *de; struct page *page; int err = 0; @@ -497,13 +496,13 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) goto out; } - de = f2fs_find_entry(dir, &dotdot, &page); + de = f2fs_find_entry(dir, &dotdot_name, &page); if (de) f2fs_put_page(page, 0); else if (IS_ERR(page)) err = PTR_ERR(page); else - err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR); + err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); out: if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); From f9ce2f550d53d044ecfb5ce996406cf42cd6b84d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Sep 2024 14:27:24 +0800 Subject: [PATCH 340/534] f2fs: get rid of online repaire on corrupted directory [ Upstream commit 884ee6dc85b959bc152f15bca80c30f06069e6c4 ] syzbot reports a f2fs bug as below: kernel BUG at fs/f2fs/inode.c:896! RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Call Trace: evict+0x532/0x950 fs/inode.c:704 dispose_list fs/inode.c:747 [inline] evict_inodes+0x5f9/0x690 fs/inode.c:797 generic_shutdown_super+0x9d/0x2d0 fs/super.c:627 kill_block_super+0x44/0x90 fs/super.c:1696 kill_f2fs_super+0x344/0x690 fs/f2fs/super.c:4898 deactivate_locked_super+0xc4/0x130 fs/super.c:473 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373 task_work_run+0x24f/0x310 kernel/task_work.c:228 ptrace_notify+0x2d2/0x380 kernel/signal.c:2402 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc6/0x190 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x279/0x370 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Online repaire on corrupted directory in f2fs_lookup() can generate dirty data/meta while racing w/ readonly remount, it may leave dirty inode after filesystem becomes readonly, however, checkpoint() will skips flushing dirty inode in a state of readonly mode, result in above panic. Let's get rid of online repaire in f2fs_lookup(), and leave the work to fsck.f2fs. Fixes: 510022a85839 ("f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries") Reported-by: syzbot+ebea2790904673d7c618@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a7b20f061ff2d56a@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 11 ------- fs/f2fs/namei.c | 68 ----------------------------------------- include/linux/f2fs_fs.h | 2 +- 3 files changed, 1 insertion(+), 80 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 028577fffb58..fcc2f9e6dc85 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -780,7 +780,6 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -3037,7 +3036,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); @@ -3161,8 +3159,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3183,8 +3179,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3271,11 +3265,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7dbd541e7bd6..2e08e1fdf485 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -455,62 +455,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot_name, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -520,7 +464,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -557,17 +500,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1352a24d72ef..b9affa64b7fa 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -262,7 +262,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ #define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ From 649ec8b30df113042588bd3d3cd4e98bcb1091e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Sep 2024 11:07:13 +0800 Subject: [PATCH 341/534] f2fs: fix to don't set SB_RDONLY in f2fs_handle_critical_error() [ Upstream commit 930c6ab93492c4b15436524e704950b364b2930c ] syzbot reports a f2fs bug as below: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 58 at kernel/rcu/sync.c:177 rcu_sync_dtor+0xcd/0x180 kernel/rcu/sync.c:177 CPU: 1 UID: 0 PID: 58 Comm: kworker/1:2 Not tainted 6.10.0-syzkaller-12562-g1722389b0d86 #0 Workqueue: events destroy_super_work RIP: 0010:rcu_sync_dtor+0xcd/0x180 kernel/rcu/sync.c:177 Call Trace: percpu_free_rwsem+0x41/0x80 kernel/locking/percpu-rwsem.c:42 destroy_super_work+0xec/0x130 fs/super.c:282 process_one_work kernel/workqueue.c:3231 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 worker_thread+0x86d/0xd40 kernel/workqueue.c:3390 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 As Christian Brauner pointed out [1]: the root cause is f2fs sets SB_RDONLY flag in internal function, rather than setting the flag covered w/ sb->s_umount semaphore via remount procedure, then below race condition causes this bug: - freeze_super() - sb_wait_write(sb, SB_FREEZE_WRITE) - sb_wait_write(sb, SB_FREEZE_PAGEFAULT) - sb_wait_write(sb, SB_FREEZE_FS) - f2fs_handle_critical_error - sb->s_flags |= SB_RDONLY - thaw_super - thaw_super_locked - sb_rdonly() is true, so it skips sb_freeze_unlock(sb, SB_FREEZE_FS) - deactivate_locked_super Since f2fs has almost the same logic as ext4 [2] when handling critical error in filesystem if it mounts w/ errors=remount-ro option: - set CP_ERROR_FLAG flag which indicates filesystem is stopped - record errors to superblock - set SB_RDONLY falg Once we set CP_ERROR_FLAG flag, all writable interfaces can detect the flag and stop any further updates on filesystem. So, it is safe to not set SB_RDONLY flag, let's remove the logic and keep in line w/ ext4 [3]. [1] https://lore.kernel.org/all/20240729-himbeeren-funknetz-96e62f9c7aee@brauner [2] https://lore.kernel.org/all/20240729132721.hxih6ehigadqf7wx@quack3 [3] https://lore.kernel.org/linux-ext4/20240805201241.27286-1-jack@suse.cz Fixes: b62e71be2110 ("f2fs: support errors=remount-ro|continue|panic mountoption") Reported-by: syzbot+20d7e439f76bbbd863a7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000b90a8e061e21d12f@google.com/ Cc: Jan Kara Cc: Christian Brauner Signed-off-by: Chao Yu Reviewed-by: Christian Brauner Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2bec5176df65..37fa7e32835a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4137,12 +4137,14 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) From 2016d58567b698474c7c98da51651ac459a38a1a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 6 Sep 2024 10:39:56 +0800 Subject: [PATCH 342/534] spi: atmel-quadspi: Undo runtime PM changes at driver exit time [ Upstream commit 438efb23f9581659495b85f1f6c7d5946200660c ] It's important to undo pm_runtime_use_autosuspend() with pm_runtime_dont_use_autosuspend() at driver exit time unless driver initially enabled pm_runtime with devm_pm_runtime_enable() (which handles it for you). Hence, call pm_runtime_dont_use_autosuspend() at driver exit time to fix it. Fixes: 4a2f83b7f780 ("spi: atmel-quadspi: add runtime pm support") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240906023956.1004440-1-ruanjinjie@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/atmel-quadspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 4cc4f32ca449..af0464999af1 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -726,6 +726,7 @@ static void atmel_qspi_remove(struct platform_device *pdev) clk_unprepare(aq->pclk); pm_runtime_disable(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } From 93773e4461978d30b8b90e4390e0fee2f397160b Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 6 Sep 2024 10:12:51 +0800 Subject: [PATCH 343/534] spi: spi-fsl-lpspi: Undo runtime PM changes at driver exit time [ Upstream commit 3b577de206d52dbde9428664b6d823d35a803d75 ] It's important to undo pm_runtime_use_autosuspend() with pm_runtime_dont_use_autosuspend() at driver exit time unless driver initially enabled pm_runtime with devm_pm_runtime_enable() (which handles it for you). Hence, call pm_runtime_dont_use_autosuspend() at driver exit time to fix it. Fixes: 944c01a889d9 ("spi: lpspi: enable runtime pm for lpspi") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240906021251.610462-1-ruanjinjie@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-lpspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index f7e268cf9085..180cea7d3817 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -988,6 +988,7 @@ static void fsl_lpspi_remove(struct platform_device *pdev) fsl_lpspi_dma_exit(controller); + pm_runtime_dont_use_autosuspend(fsl_lpspi->dev); pm_runtime_disable(fsl_lpspi->dev); } From 54fd87259c8520187e3b9b3d1f18bf6a91c0a993 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 19 Sep 2024 10:17:09 +0800 Subject: [PATCH 344/534] lib/sbitmap: define swap_lock as raw_spinlock_t [ Upstream commit 65f666c6203600053478ce8e34a1db269a8701c9 ] When called from sbitmap_queue_get(), sbitmap_deferred_clear() may be run with preempt disabled. In RT kernel, spin_lock() can sleep, then warning of "BUG: sleeping function called from invalid context" can be triggered. Fix it by replacing it with raw_spin_lock. Cc: Yang Yang Fixes: 72d04bdcf3f7 ("sbitmap: fix io hung due to race on sbitmap_word::cleared") Signed-off-by: Ming Lei Reviewed-by: Yang Yang Link: https://lore.kernel.org/r/20240919021709.511329-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/sbitmap.h | 2 +- lib/sbitmap.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index c09cdcc99471..189140bf11fc 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -40,7 +40,7 @@ struct sbitmap_word { /** * @swap_lock: serializes simultaneous updates of ->word and ->cleared */ - spinlock_t swap_lock; + raw_spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 9307bf17a817..1d5e15748692 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -65,7 +65,7 @@ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, { unsigned long mask, word_mask; - guard(spinlock_irqsave)(&map->swap_lock); + guard(raw_spinlock_irqsave)(&map->swap_lock); if (!map->cleared) { if (depth == 0) @@ -136,7 +136,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, } for (i = 0; i < sb->map_nr; i++) - spin_lock_init(&sb->map[i].swap_lock); + raw_spin_lock_init(&sb->map[i].swap_lock); return 0; } From ecb8a79d21fb9c39c7aceb464ab7c21e31e70729 Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Wed, 18 Sep 2024 10:27:43 +0200 Subject: [PATCH 345/534] spi: atmel-quadspi: Avoid overwriting delay register settings [ Upstream commit 329ca3eed4a9a161515a8714be6ba182321385c7 ] Previously the MR and SCR registers were just set with the supposedly required values, from cached register values (cached reg content initialized to zero). All parts fixed here did not consider the current register (cache) content, which would make future support of cs_setup, cs_hold, and cs_inactive impossible. Setting SCBR in atmel_qspi_setup() erases a possible DLYBS setting from atmel_qspi_set_cs_timing(). The DLYBS setting is applied by ORing over the current setting, without resetting the bits first. All writes to MR did not consider possible settings of DLYCS and DLYBCT. Signed-off-by: Alexander Dahl Fixes: f732646d0ccd ("spi: atmel-quadspi: Add support for configuring CS timing") Link: https://patch.msgid.link/20240918082744.379610-2-ada@thorsis.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/atmel-quadspi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index af0464999af1..bf7999ac22c8 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -375,9 +375,9 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq, * If the QSPI controller is set in regular SPI mode, set it in * Serial Memory Mode (SMM). */ - if (aq->mr != QSPI_MR_SMM) { - atmel_qspi_write(QSPI_MR_SMM, aq, QSPI_MR); - aq->mr = QSPI_MR_SMM; + if (!(aq->mr & QSPI_MR_SMM)) { + aq->mr |= QSPI_MR_SMM; + atmel_qspi_write(aq->scr, aq, QSPI_MR); } /* Clear pending interrupts */ @@ -501,7 +501,8 @@ static int atmel_qspi_setup(struct spi_device *spi) if (ret < 0) return ret; - aq->scr = QSPI_SCR_SCBR(scbr); + aq->scr &= ~QSPI_SCR_SCBR_MASK; + aq->scr |= QSPI_SCR_SCBR(scbr); atmel_qspi_write(aq->scr, aq, QSPI_SCR); pm_runtime_mark_last_busy(ctrl->dev.parent); @@ -534,6 +535,7 @@ static int atmel_qspi_set_cs_timing(struct spi_device *spi) if (ret < 0) return ret; + aq->scr &= ~QSPI_SCR_DLYBS_MASK; aq->scr |= QSPI_SCR_DLYBS(cs_setup); atmel_qspi_write(aq->scr, aq, QSPI_SCR); @@ -549,8 +551,8 @@ static void atmel_qspi_init(struct atmel_qspi *aq) atmel_qspi_write(QSPI_CR_SWRST, aq, QSPI_CR); /* Set the QSPI controller by default in Serial Memory Mode */ - atmel_qspi_write(QSPI_MR_SMM, aq, QSPI_MR); - aq->mr = QSPI_MR_SMM; + aq->mr |= QSPI_MR_SMM; + atmel_qspi_write(aq->mr, aq, QSPI_MR); /* Enable the QSPI controller */ atmel_qspi_write(QSPI_CR_QSPIEN, aq, QSPI_CR); From 30b9bf4b4107ce45316a41cfda2589f70e6a7f9b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Sat, 14 Sep 2024 14:01:22 +0200 Subject: [PATCH 346/534] nvme-multipath: system fails to create generic nvme device [ Upstream commit 63bcf9014e95a7d279d10d8e2caa5d88db2b1855 ] NVME_NSHEAD_DISK_LIVE is a flag for struct nvme_ns_head, not nvme_ns. The current code has a typo causing NVME_NSHEAD_DISK_LIVE never to be cleared once device_add_disk_fails, causing the system never to create the 'generic' character device. Even several rescan attempts will change the situation and the system has to be rebooted to fix the issue. Fixes: 11384580e332 ("nvme-multipath: add error handling support for add_disk()") Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 645a6b132220..37ea0fa421da 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -585,7 +585,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) rc = device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); if (rc) { - clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); + clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags); return; } nvme_add_ns_head_cdev(head); From 48617707401e8e2179d749e35d83f36252f232bc Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Tue, 2 Jul 2024 17:34:10 +0000 Subject: [PATCH 347/534] iio: adc: ad7606: fix oversampling gpio array [ Upstream commit 8dc4594b54dbaaba40dc8884ad3d42083de39434 ] gpiod_set_array_value was misused here: the implementation relied on the assumption that an unsigned long was required for each gpio, while the function expects a bit array stored in "as much unsigned long as needed for storing one bit per GPIO", i.e it is using a bit field. This leaded to incorrect parameter passed to gpiod_set_array_value, that would set 1 value instead of 3. It also prevents to select the software mode correctly for the AD7606B. Fixes: d2a415c86c6b ("iio: adc: ad7606: Add support for AD7606B ADC") Fixes: 41f71e5e7daf ("staging: iio: adc: ad7606: Use find_closest() macro") Signed-off-by: Guillaume Stols Reviewed-by: Nuno Sa Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7606.c | 4 ++-- drivers/iio/adc/ad7606_spi.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1c08c0921ee7..8c66b1e36401 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -215,9 +215,9 @@ static int ad7606_write_os_hw(struct iio_dev *indio_dev, int val) struct ad7606_state *st = iio_priv(indio_dev); DECLARE_BITMAP(values, 3); - values[0] = val; + values[0] = val & GENMASK(2, 0); - gpiod_set_array_value(ARRAY_SIZE(values), st->gpio_os->desc, + gpiod_set_array_value(st->gpio_os->ndescs, st->gpio_os->desc, st->gpio_os->info, values); /* AD7616 requires a reset to update value */ diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index 263a778bcf25..287a0591533b 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -249,8 +249,9 @@ static int ad7616_sw_mode_config(struct iio_dev *indio_dev) static int ad7606B_sw_mode_config(struct iio_dev *indio_dev) { struct ad7606_state *st = iio_priv(indio_dev); - unsigned long os[3] = {1}; + DECLARE_BITMAP(os, 3); + bitmap_fill(os, 3); /* * Software mode is enabled when all three oversampling * pins are set to high. If oversampling gpios are defined @@ -258,7 +259,7 @@ static int ad7606B_sw_mode_config(struct iio_dev *indio_dev) * otherwise, they must be hardwired to VDD */ if (st->gpio_os) { - gpiod_set_array_value(ARRAY_SIZE(os), + gpiod_set_array_value(st->gpio_os->ndescs, st->gpio_os->desc, st->gpio_os->info, os); } /* OS of 128 and 256 are available only in software mode */ From 0f115888eaa9533506aaae170b59fed8e77ebbd4 Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Tue, 2 Jul 2024 17:34:11 +0000 Subject: [PATCH 348/534] iio: adc: ad7606: fix standby gpio state to match the documentation [ Upstream commit 059fe4f8bbdf5cad212e1aeeb3e8968c80b9ff3b ] The binding's documentation specifies that "As the line is active low, it should be marked GPIO_ACTIVE_LOW". However, in the driver, it was handled the opposite way. This commit sets the driver's behaviour in sync with the documentation Fixes: 722407a4e8c0 ("staging:iio:ad7606: Use GPIO descriptor API") Signed-off-by: Guillaume Stols Reviewed-by: Nuno Sa Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7606.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 8c66b1e36401..4d755ffc3f41 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -422,7 +422,7 @@ static int ad7606_request_gpios(struct ad7606_state *st) return PTR_ERR(st->gpio_range); st->gpio_standby = devm_gpiod_get_optional(dev, "standby", - GPIOD_OUT_HIGH); + GPIOD_OUT_LOW); if (IS_ERR(st->gpio_standby)) return PTR_ERR(st->gpio_standby); @@ -665,7 +665,7 @@ static int ad7606_suspend(struct device *dev) if (st->gpio_standby) { gpiod_set_value(st->gpio_range, 1); - gpiod_set_value(st->gpio_standby, 0); + gpiod_set_value(st->gpio_standby, 1); } return 0; From dd69fb026c97b57b09a286b7f250f4ee2b7460a0 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 22 Jul 2024 22:48:10 +0800 Subject: [PATCH 349/534] driver core: Fix error handling in driver API device_rename() [ Upstream commit 6d8249ac29bc23260dfa9747eb398ce76012d73c ] For class-device, device_rename() failure maybe cause unexpected link name within its class folder as explained below: /sys/class/.../old_name -> /sys/devices/.../old_name device_rename(..., new_name) and failed /sys/class/.../new_name -> /sys/devices/.../old_name Fixed by undoing renaming link if renaming kobject failed. Fixes: f349cf34731c ("driver core: Implement ns directory support for device classes.") Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240722-device_rename_fix-v2-1-77de1a6c6495@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index cb323700e952..60a0a4630a5b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4485,9 +4485,11 @@ EXPORT_SYMBOL_GPL(device_destroy); */ int device_rename(struct device *dev, const char *new_name) { + struct subsys_private *sp = NULL; struct kobject *kobj = &dev->kobj; char *old_device_name = NULL; int error; + bool is_link_renamed = false; dev = get_device(dev); if (!dev) @@ -4502,7 +4504,7 @@ int device_rename(struct device *dev, const char *new_name) } if (dev->class) { - struct subsys_private *sp = class_to_subsys(dev->class); + sp = class_to_subsys(dev->class); if (!sp) { error = -EINVAL; @@ -4511,16 +4513,19 @@ int device_rename(struct device *dev, const char *new_name) error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name, new_name, kobject_namespace(kobj)); - subsys_put(sp); if (error) goto out; + + is_link_renamed = true; } error = kobject_rename(kobj, new_name); - if (error) - goto out; - out: + if (error && is_link_renamed) + sysfs_rename_link_ns(&sp->subsys.kobj, kobj, new_name, + old_device_name, kobject_namespace(kobj)); + subsys_put(sp); + put_device(dev); kfree(old_device_name); From 5d86a29db8a332d89088a4523f6c0308f7f4b938 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 2 Jul 2024 11:18:50 +0300 Subject: [PATCH 350/534] ABI: testing: fix admv8818 attr description [ Upstream commit 7d34b4ad8cd2867b130b5b8d7d76d0d6092bd019 ] Fix description of the filter_mode_available attribute by pointing to the correct name of the attribute that can be written with valid values. Fixes: bf92d87d7c67 ("iio:filter:admv8818: Add sysfs ABI documentation") Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20240702081851.4663-1-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 index 31dbb390573f..c431f0a13cf5 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 +++ b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 @@ -3,7 +3,7 @@ KernelVersion: Contact: linux-iio@vger.kernel.org Description: Reading this returns the valid values that can be written to the - on_altvoltage0_mode attribute: + filter_mode attribute: - auto -> Adjust bandpass filter to track changes in input clock rate. - manual -> disable/unregister the clock rate notifier / input clock tracking. From 2bc96d4ea9e1310cf41d7dc062f76c8ad57849cb Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Mon, 10 Jun 2024 01:38:12 +0200 Subject: [PATCH 351/534] iio: chemical: bme680: Fix read/write ops to device by adding mutexes [ Upstream commit 77641e5a477d428335cd094b88ac54e09ccb70f4 ] Add mutexes in the {read/write}_raw() functions of the device to guard the read/write of data from/to the device. This is necessary because for any operation other than temperature, multiple reads need to take place from the device. Even though regmap has a locking by itself, it won't protect us from multiple applications trying to read at the same time temperature and pressure since the pressure reading includes an internal temperature reading and there is nothing to ensure that this temperature+pressure reading will happen sequentially without any other operation interfering in the meantime. Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor") Signed-off-by: Vasileios Amoiridis Link: https://patch.msgid.link/20240609233826.330516-2-vassilisamir@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/chemical/bme680_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 500f56834b01..a6bf689833da 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ struct bme680_calib { struct bme680_data { struct regmap *regmap; struct bme680_calib bme680; + struct mutex lock; /* Protect multiple serial R/W ops to device. */ u8 oversampling_temp; u8 oversampling_press; u8 oversampling_humid; @@ -827,6 +829,8 @@ static int bme680_read_raw(struct iio_dev *indio_dev, { struct bme680_data *data = iio_priv(indio_dev); + guard(mutex)(&data->lock); + switch (mask) { case IIO_CHAN_INFO_PROCESSED: switch (chan->type) { @@ -871,6 +875,8 @@ static int bme680_write_raw(struct iio_dev *indio_dev, { struct bme680_data *data = iio_priv(indio_dev); + guard(mutex)(&data->lock); + if (val2 != 0) return -EINVAL; @@ -967,6 +973,7 @@ int bme680_core_probe(struct device *dev, struct regmap *regmap, name = bme680_match_acpi_device(dev); data = iio_priv(indio_dev); + mutex_init(&data->lock); dev_set_drvdata(dev, indio_dev); data->regmap = regmap; indio_dev->name = name; From c5a4a27666e0520d83adbaac7880424e9611c5aa Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 18 Aug 2023 08:55:56 +0100 Subject: [PATCH 352/534] iio: magnetometer: ak8975: Convert enum->pointer for data in the match tables [ Upstream commit 4f9ea93afde190a0f906ee624fc9a45cf784551b ] Convert enum->pointer for data in the match tables to simplify the probe() by replacing device_get_match_data() and i2c_client_get_device_id by i2c_get_match_data() as we have similar I2C, ACPI and DT matching table. Signed-off-by: Biju Das Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230818075600.24277-2-biju.das.jz@bp.renesas.com Signed-off-by: Jonathan Cameron Stable-dep-of: da6e3160df23 ("iio: magnetometer: ak8975: drop incorrect AK09116 compatible") Signed-off-by: Sasha Levin --- drivers/iio/magnetometer/ak8975.c | 75 +++++++++++++------------------ 1 file changed, 30 insertions(+), 45 deletions(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index eb706d0bf70b..104798549de1 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -813,13 +813,13 @@ static const struct iio_info ak8975_info = { }; static const struct acpi_device_id ak_acpi_match[] = { - {"AK8975", AK8975}, - {"AK8963", AK8963}, - {"INVN6500", AK8963}, - {"AK009911", AK09911}, - {"AK09911", AK09911}, - {"AKM9911", AK09911}, - {"AK09912", AK09912}, + {"AK8975", (kernel_ulong_t)&ak_def_array[AK8975] }, + {"AK8963", (kernel_ulong_t)&ak_def_array[AK8963] }, + {"INVN6500", (kernel_ulong_t)&ak_def_array[AK8963] }, + {"AK009911", (kernel_ulong_t)&ak_def_array[AK09911] }, + {"AK09911", (kernel_ulong_t)&ak_def_array[AK09911] }, + {"AKM9911", (kernel_ulong_t)&ak_def_array[AK09911] }, + {"AK09912", (kernel_ulong_t)&ak_def_array[AK09912] }, { } }; MODULE_DEVICE_TABLE(acpi, ak_acpi_match); @@ -883,10 +883,7 @@ static int ak8975_probe(struct i2c_client *client) struct iio_dev *indio_dev; struct gpio_desc *eoc_gpiod; struct gpio_desc *reset_gpiod; - const void *match; - unsigned int i; int err; - enum asahi_compass_chipset chipset; const char *name = NULL; /* @@ -928,27 +925,15 @@ static int ak8975_probe(struct i2c_client *client) return err; /* id will be NULL when enumerated via ACPI */ - match = device_get_match_data(&client->dev); - if (match) { - chipset = (uintptr_t)match; - name = dev_name(&client->dev); - } else if (id) { - chipset = (enum asahi_compass_chipset)(id->driver_data); - name = id->name; - } else - return -ENOSYS; - - for (i = 0; i < ARRAY_SIZE(ak_def_array); i++) - if (ak_def_array[i].type == chipset) - break; - - if (i == ARRAY_SIZE(ak_def_array)) { - dev_err(&client->dev, "AKM device type unsupported: %d\n", - chipset); + data->def = i2c_get_match_data(client); + if (!data->def) return -ENODEV; - } - data->def = &ak_def_array[i]; + /* If enumerated via firmware node, fix the ABI */ + if (dev_fwnode(&client->dev)) + name = dev_name(&client->dev); + else + name = id->name; /* Fetch the regulators */ data->vdd = devm_regulator_get(&client->dev, "vdd"); @@ -1077,28 +1062,28 @@ static DEFINE_RUNTIME_DEV_PM_OPS(ak8975_dev_pm_ops, ak8975_runtime_suspend, ak8975_runtime_resume, NULL); static const struct i2c_device_id ak8975_id[] = { - {"ak8975", AK8975}, - {"ak8963", AK8963}, - {"AK8963", AK8963}, - {"ak09911", AK09911}, - {"ak09912", AK09912}, - {"ak09916", AK09916}, + {"ak8975", (kernel_ulong_t)&ak_def_array[AK8975] }, + {"ak8963", (kernel_ulong_t)&ak_def_array[AK8963] }, + {"AK8963", (kernel_ulong_t)&ak_def_array[AK8963] }, + {"ak09911", (kernel_ulong_t)&ak_def_array[AK09911] }, + {"ak09912", (kernel_ulong_t)&ak_def_array[AK09912] }, + {"ak09916", (kernel_ulong_t)&ak_def_array[AK09916] }, {} }; MODULE_DEVICE_TABLE(i2c, ak8975_id); static const struct of_device_id ak8975_of_match[] = { - { .compatible = "asahi-kasei,ak8975", }, - { .compatible = "ak8975", }, - { .compatible = "asahi-kasei,ak8963", }, - { .compatible = "ak8963", }, - { .compatible = "asahi-kasei,ak09911", }, - { .compatible = "ak09911", }, - { .compatible = "asahi-kasei,ak09912", }, - { .compatible = "ak09912", }, - { .compatible = "asahi-kasei,ak09916", }, - { .compatible = "ak09916", }, + { .compatible = "asahi-kasei,ak8975", .data = &ak_def_array[AK8975] }, + { .compatible = "ak8975", .data = &ak_def_array[AK8975] }, + { .compatible = "asahi-kasei,ak8963", .data = &ak_def_array[AK8963] }, + { .compatible = "ak8963", .data = &ak_def_array[AK8963] }, + { .compatible = "asahi-kasei,ak09911", .data = &ak_def_array[AK09911] }, + { .compatible = "ak09911", .data = &ak_def_array[AK09911] }, + { .compatible = "asahi-kasei,ak09912", .data = &ak_def_array[AK09912] }, + { .compatible = "ak09912", .data = &ak_def_array[AK09912] }, + { .compatible = "asahi-kasei,ak09916", .data = &ak_def_array[AK09916] }, + { .compatible = "ak09916", .data = &ak_def_array[AK09916] }, {} }; MODULE_DEVICE_TABLE(of, ak8975_of_match); From 7387270b6837cc910db4799e27a122327131f0d1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 07:30:15 +0200 Subject: [PATCH 353/534] iio: magnetometer: ak8975: drop incorrect AK09116 compatible [ Upstream commit da6e3160df230692bbd48a6d52318035f19595e2 ] All compatibles in this binding without prefixes were deprecated, so adding a new deprecated one after some time is not allowed, because it defies the core logic of deprecating things. Drop the AK09916 vendorless compatible. Fixes: 76e28aa97fa0 ("iio: magnetometer: ak8975: add AK09116 support") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806053016.6401-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/magnetometer/ak8975.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 104798549de1..a79a1d5b4639 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -1083,7 +1083,6 @@ static const struct of_device_id ak8975_of_match[] = { { .compatible = "asahi-kasei,ak09912", .data = &ak_def_array[AK09912] }, { .compatible = "ak09912", .data = &ak_def_array[AK09912] }, { .compatible = "asahi-kasei,ak09916", .data = &ak_def_array[AK09916] }, - { .compatible = "ak09916", .data = &ak_def_array[AK09916] }, {} }; MODULE_DEVICE_TABLE(of, ak8975_of_match); From fdc637d4f5fb06460ba0d2fca28dbf2559b498ef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 07:30:16 +0200 Subject: [PATCH 354/534] dt-bindings: iio: asahi-kasei,ak8975: drop incorrect AK09116 compatible [ Upstream commit c7668ac67bc21aebdd8e2d7f839bfffba31b7713 ] All compatibles in this binding without prefixes were deprecated, so adding a new deprecated one after some time is not allowed, because it defies the core logic of deprecating things. Drop the AK09916 vendorless compatible. Fixes: 76e28aa97fa0 ("iio: magnetometer: ak8975: add AK09116 support") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Link: https://patch.msgid.link/20240806053016.6401-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- .../devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml index 9790f75fc669..fe5145d3b73c 100644 --- a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml +++ b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml @@ -23,7 +23,6 @@ properties: - ak8963 - ak09911 - ak09912 - - ak09916 deprecated: true reg: From b8e45b910525704010d10c9dcbf2abf3005aa97c Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 12 Aug 2024 16:06:58 +0800 Subject: [PATCH 355/534] driver core: Fix a potential null-ptr-deref in module_add_driver() [ Upstream commit 18ec12c97b39ff6aa15beb8d2b25d15cd44b87d8 ] Inject fault while probing of-fpga-region, if kasprintf() fails in module_add_driver(), the second sysfs_remove_link() in exit path will cause null-ptr-deref as below because kernfs_name_hash() will call strlen() with NULL driver_name. Fix it by releasing resources based on the exit path sequence. KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfffffc000000000] address between user and kernel address ranges Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: of_fpga_region(+) fpga_region fpga_bridge cfg80211 rfkill 8021q garp mrp stp llc ipv6 [last unloaded: of_fpga_region] CPU: 2 UID: 0 PID: 2036 Comm: modprobe Not tainted 6.11.0-rc2-g6a0e38264012 #295 Hardware name: linux,dummy-virt (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : strlen+0x24/0xb0 lr : kernfs_name_hash+0x1c/0xc4 sp : ffffffc081f97380 x29: ffffffc081f97380 x28: ffffffc081f97b90 x27: ffffff80c821c2a0 x26: ffffffedac0be418 x25: 0000000000000000 x24: ffffff80c09d2000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000001840 x17: 0000000000000000 x16: 0000000000000000 x15: 1ffffff8103f2e42 x14: 00000000f1f1f1f1 x13: 0000000000000004 x12: ffffffb01812d61d x11: 1ffffff01812d61c x10: ffffffb01812d61c x9 : dfffffc000000000 x8 : 0000004fe7ed29e4 x7 : ffffff80c096b0e7 x6 : 0000000000000001 x5 : ffffff80c096b0e0 x4 : 1ffffffdb990efa2 x3 : 0000000000000000 x2 : 0000000000000000 x1 : dfffffc000000000 x0 : 0000000000000000 Call trace: strlen+0x24/0xb0 kernfs_name_hash+0x1c/0xc4 kernfs_find_ns+0x118/0x2e8 kernfs_remove_by_name_ns+0x80/0x100 sysfs_remove_link+0x74/0xa8 module_add_driver+0x278/0x394 bus_add_driver+0x1f0/0x43c driver_register+0xf4/0x3c0 __platform_driver_register+0x60/0x88 of_fpga_region_init+0x20/0x1000 [of_fpga_region] do_one_initcall+0x110/0x788 do_init_module+0x1dc/0x5c8 load_module+0x3c38/0x4cac init_module_from_file+0xd4/0x128 idempotent_init_module+0x2cc/0x528 __arm64_sys_finit_module+0xac/0x100 invoke_syscall+0x6c/0x258 el0_svc_common.constprop.0+0x160/0x22c do_el0_svc+0x44/0x5c el0_svc+0x48/0xb8 el0t_64_sync_handler+0x13c/0x158 el0t_64_sync+0x190/0x194 Code: f2fbffe1 a90157f4 12000802 aa0003f5 (38e16861) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception Fixes: 85d2b0aa1703 ("module: don't ignore sysfs_create_link() failures") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20240812080658.2791982-1-ruanjinjie@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/module.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index b0b79b9c189d..0d5c5da367f7 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -66,27 +66,31 @@ int module_add_driver(struct module *mod, struct device_driver *drv) driver_name = make_driver_name(drv); if (!driver_name) { ret = -ENOMEM; - goto out; + goto out_remove_kobj; } module_create_drivers_dir(mk); if (!mk->drivers_dir) { ret = -EINVAL; - goto out; + goto out_free_driver_name; } ret = sysfs_create_link(mk->drivers_dir, &drv->p->kobj, driver_name); if (ret) - goto out; + goto out_remove_drivers_dir; kfree(driver_name); return 0; -out: - sysfs_remove_link(&drv->p->kobj, "module"); + +out_remove_drivers_dir: sysfs_remove_link(mk->drivers_dir, driver_name); + +out_free_driver_name: kfree(driver_name); +out_remove_kobj: + sysfs_remove_link(&drv->p->kobj, "module"); return ret; } From 5060a1be93991a33026c309e5ba5ac0a892deb27 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Wed, 7 Aug 2024 16:12:25 +0200 Subject: [PATCH 356/534] serial: 8250: omap: Cleanup on error in request_irq [ Upstream commit 35e648a16018b747897be2ccc3ce95ff23237bb5 ] If devm_request_irq fails, the code does not cleanup many things that were setup before. Instead of directly returning ret we should jump to err. Fixes: fef4f600319e ("serial: 8250: omap: Fix life cycle issues for interrupt handlers") Signed-off-by: Markus Schneider-Pargmann Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Link: https://lore.kernel.org/r/20240807141227.1093006-4-msp@baylibre.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 8f472a2080ff..4caecc3525bf 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1567,7 +1567,7 @@ static int omap8250_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, irq, omap8250_irq, 0, dev_name(&pdev->dev), priv); if (ret < 0) - return ret; + goto err; priv->wakeirq = irq_of_parse_and_map(np, 1); From 257c7a39092e59069bad51ad823a8f25b31d66ea Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 2 Jul 2024 14:28:46 +0100 Subject: [PATCH 357/534] coresight: tmc: sg: Do not leak sg_table [ Upstream commit c58dc5a1f886f2fcc1133746d0cbaa1fe7fd44ff ] Running perf with cs_etm on Juno triggers the following kmemleak warning ! :~# cat /sys/kernel/debug/kmemleak unreferenced object 0xffffff8806b6d720 (size 96): comm "perf", pid 562, jiffies 4297810960 hex dump (first 32 bytes): 38 d8 13 07 88 ff ff ff 00 d0 9e 85 c0 ff ff ff 8............... 00 10 00 88 c0 ff ff ff 00 f0 ff f7 ff 00 00 00 ................ backtrace (crc 1dbf6e00): [] kmemleak_alloc+0xbc/0xd8 [] kmalloc_trace_noprof+0x220/0x2e8 [] tmc_alloc_sg_table+0x48/0x208 [coresight_tmc] [] tmc_etr_alloc_sg_buf+0xac/0x240 [coresight_tmc] [] tmc_alloc_etr_buf.constprop.0+0x1f0/0x260 [coresight_tmc] [] alloc_etr_buf.constprop.0.isra.0+0x74/0xa8 [coresight_tmc] [] tmc_alloc_etr_buffer+0x110/0x260 [coresight_tmc] [] etm_setup_aux+0x204/0x3b0 [coresight] [] rb_alloc_aux+0x20c/0x318 [] perf_mmap+0x2e4/0x7a0 [] mmap_region+0x3b0/0xa08 [] do_mmap+0x3a0/0x500 [] vm_mmap_pgoff+0x100/0x1d0 [] ksys_mmap_pgoff+0xb8/0x110 [] __arm64_sys_mmap+0x38/0x58 [] invoke_syscall.constprop.0+0x58/0x100 This due to the fact that we do not free the "sg_table" itself while freeing up the SG table and data pages. Fix this by freeing the sg_table in tmc_free_sg_table(). Fixes: 99443ea19e8b ("coresight: Add generic TMC sg table framework") Cc: Mike Leach Cc: James Clark Signed-off-by: Suzuki K Poulose Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240702132846.1677261-1-suzuki.poulose@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 8311e1028ddb..f3312fbcdc0f 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -255,6 +255,7 @@ void tmc_free_sg_table(struct tmc_sg_table *sg_table) { tmc_free_table_pages(sg_table); tmc_free_data_pages(sg_table); + kfree(sg_table); } EXPORT_SYMBOL_GPL(tmc_free_sg_table); @@ -336,7 +337,6 @@ struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev, rc = tmc_alloc_table_pages(sg_table); if (rc) { tmc_free_sg_table(sg_table); - kfree(sg_table); return ERR_PTR(rc); } From c16fa6d5018b0cd77d2462fa87c248fa0b8b754e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Jul 2024 14:48:23 -0700 Subject: [PATCH 358/534] interconnect: icc-clk: Add missed num_nodes initialization [ Upstream commit c801ed86840ec38b2a9bcafeee3d7c9e14c743f3 ] With the new __counted_by annotation, the "num_nodes" struct member must be set before accessing the "nodes" array. This initialization was done in other places where a new struct icc_onecell_data is allocated, but this case in icc_clk_register() was missed. Set "num_nodes" after allocation. Fixes: dd4904f3b924 ("interconnect: qcom: Annotate struct icc_onecell_data with __counted_by") Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240716214819.work.328-kees@kernel.org Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin --- drivers/interconnect/icc-clk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c index d787f2ea36d9..a91df709cfb2 100644 --- a/drivers/interconnect/icc-clk.c +++ b/drivers/interconnect/icc-clk.c @@ -87,6 +87,7 @@ struct icc_provider *icc_clk_register(struct device *dev, onecell = devm_kzalloc(dev, struct_size(onecell, nodes, 2 * num_clocks), GFP_KERNEL); if (!onecell) return ERR_PTR(-ENOMEM); + onecell->num_nodes = 2 * num_clocks; qp = devm_kzalloc(dev, struct_size(qp, clocks, num_clocks), GFP_KERNEL); if (!qp) @@ -133,8 +134,6 @@ struct icc_provider *icc_clk_register(struct device *dev, onecell->nodes[j++] = node; } - onecell->num_nodes = j; - ret = icc_provider_register(provider); if (ret) goto err; From 70a180b8d84b642c7fef3b40c956eff2034d2781 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 28 Aug 2024 16:42:28 +0800 Subject: [PATCH 359/534] cxl/pci: Fix to record only non-zero ranges [ Upstream commit 55e268694e8b07026c88191f9b6949b6887d9ce3 ] The function cxl_dvsec_rr_decode() retrieves and records DVSEC ranges into info->dvsec_range[], regardless of whether it is non-zero range, and the variable info->ranges indicates the number of non-zero ranges. However, in cxl_hdm_decode_init(), the validation for info->dvsec_range[] occurs in a for loop that iterates based on info->ranges. It may result in zero range to be validated but non-zero range not be validated, in turn, the number of allowed ranges is to be 0. Address it by only record non-zero ranges. This fix is not urgent as it requires a configuration that zeroes out the first dvsec range while populating the second. This has not been observed, but it is theoretically possible. If this gets picked up for -stable, no harm done, but there is no urgency to backport. Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Jonathan Cameron Signed-off-by: Yanfei Xu Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240828084231.1378789-2-yanfei.xu@intel.com Signed-off-by: Dave Jiang Signed-off-by: Sasha Levin --- drivers/cxl/core/pci.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c963cd9e88d1..6edfd0546673 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -388,10 +388,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; if (!size) { - info->dvsec_range[i] = (struct range) { - .start = 0, - .end = CXL_RESOURCE_NONE, - }; continue; } @@ -409,12 +405,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - info->dvsec_range[i] = (struct range) { + info->dvsec_range[ranges++] = (struct range) { .start = base, .end = base + size - 1 }; - - ranges++; } info->ranges = ranges; From ca64edd7ae93402af2596a952e0d94d545e2b9c0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 16 Aug 2024 11:19:00 +0800 Subject: [PATCH 360/534] vhost_vdpa: assign irq bypass producer token correctly [ Upstream commit 02e9e9366fefe461719da5d173385b6685f70319 ] We used to call irq_bypass_unregister_producer() in vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the token pointer is still valid or not. Actually, we use the eventfd_ctx as the token so the life cycle of the token should be bound to the VHOST_SET_VRING_CALL instead of vhost_vdpa_setup_vq_irq() which could be called by set_status(). Fixing this by setting up irq bypass producer's token when handling VHOST_SET_VRING_CALL and un-registering the producer before calling vhost_vring_ioctl() to prevent a possible use after free as eventfd could have been released in vhost_vring_ioctl(). And such registering and unregistering will only be done if DRIVER_OK is set. Reported-by: Dragos Tatulea Tested-by: Dragos Tatulea Reviewed-by: Dragos Tatulea Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa") Signed-off-by: Jason Wang Message-Id: <20240816031900.18013-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index da2c31ccc138..c29a195a0175 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -191,11 +191,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) if (irq < 0) return; - irq_bypass_unregister_producer(&vq->call_ctx.producer); if (!vq->call_ctx.ctx) return; - vq->call_ctx.producer.token = vq->call_ctx.ctx; vq->call_ctx.producer.irq = irq; ret = irq_bypass_register_producer(&vq->call_ctx.producer); if (unlikely(ret)) @@ -627,6 +625,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, vq->last_avail_idx = vq_state.split.avail_index; } break; + case VHOST_SET_VRING_CALL: + if (vq->call_ctx.ctx) { + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_unsetup_vq_irq(v, idx); + vq->call_ctx.producer.token = NULL; + } + break; } r = vhost_vring_ioctl(&v->vdev, cmd, argp); @@ -659,13 +665,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; cb.trigger = vq->call_ctx.ctx; + vq->call_ctx.producer.token = vq->call_ctx.ctx; + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_setup_vq_irq(v, idx); } else { cb.callback = NULL; cb.private = NULL; cb.trigger = NULL; } ops->set_vq_cb(vdpa, idx, &cb); - vhost_vdpa_setup_vq_irq(v, idx); break; case VHOST_SET_VRING_NUM: @@ -1316,6 +1325,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) for (i = 0; i < nvqs; i++) { vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; + vqs[i]->call_ctx.ctx = NULL; } vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); From 66e78ade976dbd9bea09166aa8d66afc0963cde4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Sep 2024 10:39:15 +0300 Subject: [PATCH 361/534] ep93xx: clock: Fix off by one in ep93xx_div_recalc_rate() [ Upstream commit c7f06284a6427475e3df742215535ec3f6cd9662 ] The psc->div[] array has psc->num_div elements. These values come from when we call clk_hw_register_div(). It's adc_divisors and ARRAY_SIZE(adc_divisors)) and so on. So this condition needs to be >= instead of > to prevent an out of bounds read. Fixes: 9645ccc7bd7a ("ep93xx: clock: convert in-place to COMMON_CLK") Signed-off-by: Dan Carpenter Acked-by: Alexander Sverdlin Reviewed-by: Nikita Shubin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/1caf01ad4c0a8069535813c26c7f0b8ea011155e.camel@linaro.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-ep93xx/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 85a496ddc619..e9f72a529b50 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -359,7 +359,7 @@ static unsigned long ep93xx_div_recalc_rate(struct clk_hw *hw, u32 val = __raw_readl(psc->reg); u8 index = (val & psc->mask) >> psc->shift; - if (index > psc->num_div) + if (index >= psc->num_div) return 0; return DIV_ROUND_UP_ULL(parent_rate, psc->div[index]); From 9360d077d3197f895f34cb8e6ae1b599278abf9d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Sep 2024 15:05:18 +0200 Subject: [PATCH 362/534] Revert "dm: requeue IO if mapping table not yet available" [ Upstream commit c8691cd0fc11197515ed148de0780d927bfca38b ] This reverts commit fa247089de9936a46e290d4724cb5f0b845600f5. The following sequence of commands causes a livelock - there will be workqueue process looping and consuming 100% CPU: dmsetup create --notable test truncate -s 1MiB testdata losetup /dev/loop0 testdata dmsetup load test --table '0 2048 linear /dev/loop0 0' dd if=/dev/zero of=/dev/dm-0 bs=16k count=1 conv=fdatasync The livelock is caused by the commit fa247089de99. The commit claims that it fixes a race condition, however, it is unknown what the actual race condition is and what program is involved in the race condition. When the inactive table is loaded, the nodes /dev/dm-0 and /sys/block/dm-0 are created. /dev/dm-0 has zero size at this point. When the device is suspended and resumed, the nodes /dev/mapper/test and /dev/disk/* are created. If some program opens a block device before it is created by dmsetup or lvm, the program is buggy, so dm could just report an error as it used to do before. Reported-by: Zdenek Kabelac Signed-off-by: Mikulas Patocka Fixes: fa247089de99 ("dm: requeue IO if mapping table not yet available") Signed-off-by: Sasha Levin --- drivers/md/dm-rq.c | 4 +++- drivers/md/dm.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index f7e9a3632eb3..499f8cc8a39f 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -496,8 +496,10 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, map = dm_get_live_table(md, &srcu_idx); if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); dm_put_live_table(md, srcu_idx); - return BLK_STS_RESOURCE; + return BLK_STS_IOERR; } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8ec0a263744a..5dd0a42463a2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1817,10 +1817,15 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); + bio_io_error(bio); + goto out; + } - /* If suspended, or map not yet available, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || - unlikely(!map)) { + /* If suspended, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) From bcce13930b2eb7eed4284cd4f21d79d38e3344a7 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 13 Sep 2024 10:57:11 -0400 Subject: [PATCH 363/534] net: xilinx: axienet: Schedule NAPI in two steps [ Upstream commit ba0da2dc934ec5ac32bbeecbd0670da16ba03565 ] As advised by Documentation/networking/napi.rst, masking IRQs after calling napi_schedule can be racy. Avoid this by only masking/scheduling if napi_schedule_prep returns true. Fixes: 9e2bc267e780 ("net: axienet: Use NAPI for TX completion path") Fixes: cc37610caaf8 ("net: axienet: implement NAPI and GRO receive") Signed-off-by: Sean Anderson Reviewed-by: Shannon Nelson Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240913145711.2284295-1-sean.anderson@linux.dev Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 65d7aaad43fe..f869d61e3b86 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1042,9 +1042,10 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) u32 cr = lp->tx_dma_cr; cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - napi_schedule(&lp->napi_tx); + if (napi_schedule_prep(&lp->napi_tx)) { + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + __napi_schedule(&lp->napi_tx); + } } return IRQ_HANDLED; @@ -1086,9 +1087,10 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) u32 cr = lp->rx_dma_cr; cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - napi_schedule(&lp->napi_rx); + if (napi_schedule_prep(&lp->napi_rx)) { + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + __napi_schedule(&lp->napi_rx); + } } return IRQ_HANDLED; From 89bab8310a0a4adbdd9ba40c8b982d899bb5488f Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 13 Sep 2024 10:51:56 -0400 Subject: [PATCH 364/534] net: xilinx: axienet: Fix packet counting [ Upstream commit 5a6caa2cfabb559309b5ce29ee7c8e9ce1a9a9df ] axienet_free_tx_chain returns the number of DMA descriptors it's handled. However, axienet_tx_poll treats the return as the number of packets. When scatter-gather SKBs are enabled, a single packet may use multiple DMA descriptors, which causes incorrect packet counts. Fix this by explicitly keepting track of the number of packets processed as separate from the DMA descriptors. Budget does not affect the number of Tx completions we can process for NAPI, so we use the ring size as the limit instead of budget. As we no longer return the number of descriptors processed to axienet_tx_poll, we now update tx_bd_ci in axienet_free_tx_chain. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20240913145156.2283067-1-sean.anderson@linux.dev Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/xilinx/xilinx_axienet_main.c | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index f869d61e3b86..62c10eb4f0ad 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -652,15 +652,15 @@ static int axienet_device_reset(struct net_device *ndev) * * Would either be called after a successful transmit operation, or after * there was an error when setting up the chain. - * Returns the number of descriptors handled. + * Returns the number of packets handled. */ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, int nr_bds, bool force, u32 *sizep, int budget) { struct axidma_bd *cur_p; unsigned int status; + int i, packets = 0; dma_addr_t phys; - int i; for (i = 0; i < nr_bds; i++) { cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num]; @@ -679,8 +679,10 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); - if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) + if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) { napi_consume_skb(cur_p->skb, budget); + packets++; + } cur_p->app0 = 0; cur_p->app1 = 0; @@ -696,7 +698,13 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; } - return i; + if (!force) { + lp->tx_bd_ci += i; + if (lp->tx_bd_ci >= lp->tx_bd_num) + lp->tx_bd_ci %= lp->tx_bd_num; + } + + return packets; } /** @@ -747,13 +755,10 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget) u32 size = 0; int packets; - packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false, &size, budget); + packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num, false, + &size, budget); if (packets) { - lp->tx_bd_ci += packets; - if (lp->tx_bd_ci >= lp->tx_bd_num) - lp->tx_bd_ci %= lp->tx_bd_num; - u64_stats_update_begin(&lp->tx_stat_sync); u64_stats_add(&lp->tx_packets, packets); u64_stats_add(&lp->tx_bytes, size); From af4b8a704f26f38310655bad67fd8096293275a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2024 17:06:15 +0000 Subject: [PATCH 365/534] netfilter: nf_reject_ipv6: fix nf_reject_ip6_tcphdr_put() [ Upstream commit 9c778fe48d20ef362047e3376dee56d77f8500d4 ] syzbot reported that nf_reject_ip6_tcphdr_put() was possibly sending garbage on the four reserved tcp bits (th->res1) Use skb_put_zero() to clear the whole TCP header, as done in nf_reject_ip_tcphdr_put() BUG: KMSAN: uninit-value in nf_reject_ip6_tcphdr_put+0x688/0x6c0 net/ipv6/netfilter/nf_reject_ipv6.c:255 nf_reject_ip6_tcphdr_put+0x688/0x6c0 net/ipv6/netfilter/nf_reject_ipv6.c:255 nf_send_reset6+0xd84/0x15b0 net/ipv6/netfilter/nf_reject_ipv6.c:344 nft_reject_inet_eval+0x3c1/0x880 net/netfilter/nft_reject_inet.c:48 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x438/0x22a0 net/netfilter/nf_tables_core.c:288 nft_do_chain_inet+0x41a/0x4f0 net/netfilter/nft_chain_filter.c:161 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0x29b/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5775 process_backlog+0x4ad/0xa50 net/core/dev.c:6108 __napi_poll+0xe7/0x980 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:6963 handle_softirqs+0x1ce/0x800 kernel/softirq.c:554 __do_softirq+0x14/0x1a kernel/softirq.c:588 do_softirq+0x9a/0x100 kernel/softirq.c:455 __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:908 [inline] __dev_queue_xmit+0x2692/0x5610 net/core/dev.c:4450 dev_queue_xmit include/linux/netdevice.h:3105 [inline] neigh_resolve_output+0x9ca/0xae0 net/core/neighbour.c:1565 neigh_output include/net/neighbour.h:542 [inline] ip6_finish_output2+0x2347/0x2ba0 net/ipv6/ip6_output.c:141 __ip6_finish_output net/ipv6/ip6_output.c:215 [inline] ip6_finish_output+0xbb8/0x14b0 net/ipv6/ip6_output.c:226 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip6_output+0x356/0x620 net/ipv6/ip6_output.c:247 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_xmit+0x1ba6/0x25d0 net/ipv6/ip6_output.c:366 inet6_csk_xmit+0x442/0x530 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x3b07/0x4880 net/ipv4/tcp_output.c:1466 tcp_transmit_skb net/ipv4/tcp_output.c:1484 [inline] tcp_connect+0x35b6/0x7130 net/ipv4/tcp_output.c:4143 tcp_v6_connect+0x1bcc/0x1e40 net/ipv6/tcp_ipv6.c:333 __inet_stream_connect+0x2ef/0x1730 net/ipv4/af_inet.c:679 inet_stream_connect+0x6a/0xd0 net/ipv4/af_inet.c:750 __sys_connect_file net/socket.c:2061 [inline] __sys_connect+0x606/0x690 net/socket.c:2078 __do_sys_connect net/socket.c:2088 [inline] __se_sys_connect net/socket.c:2085 [inline] __x64_sys_connect+0x91/0xe0 net/socket.c:2085 x64_sys_call+0x27a5/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:43 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was stored to memory at: nf_reject_ip6_tcphdr_put+0x60c/0x6c0 net/ipv6/netfilter/nf_reject_ipv6.c:249 nf_send_reset6+0xd84/0x15b0 net/ipv6/netfilter/nf_reject_ipv6.c:344 nft_reject_inet_eval+0x3c1/0x880 net/netfilter/nft_reject_inet.c:48 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x438/0x22a0 net/netfilter/nf_tables_core.c:288 nft_do_chain_inet+0x41a/0x4f0 net/netfilter/nft_chain_filter.c:161 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0x29b/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5775 process_backlog+0x4ad/0xa50 net/core/dev.c:6108 __napi_poll+0xe7/0x980 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:6963 handle_softirqs+0x1ce/0x800 kernel/softirq.c:554 __do_softirq+0x14/0x1a kernel/softirq.c:588 Uninit was stored to memory at: nf_reject_ip6_tcphdr_put+0x2ca/0x6c0 net/ipv6/netfilter/nf_reject_ipv6.c:231 nf_send_reset6+0xd84/0x15b0 net/ipv6/netfilter/nf_reject_ipv6.c:344 nft_reject_inet_eval+0x3c1/0x880 net/netfilter/nft_reject_inet.c:48 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x438/0x22a0 net/netfilter/nf_tables_core.c:288 nft_do_chain_inet+0x41a/0x4f0 net/netfilter/nft_chain_filter.c:161 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0x29b/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5775 process_backlog+0x4ad/0xa50 net/core/dev.c:6108 __napi_poll+0xe7/0x980 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:6963 handle_softirqs+0x1ce/0x800 kernel/softirq.c:554 __do_softirq+0x14/0x1a kernel/softirq.c:588 Uninit was created at: slab_post_alloc_hook mm/slub.c:3998 [inline] slab_alloc_node mm/slub.c:4041 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4084 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:583 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:674 alloc_skb include/linux/skbuff.h:1320 [inline] nf_send_reset6+0x98d/0x15b0 net/ipv6/netfilter/nf_reject_ipv6.c:327 nft_reject_inet_eval+0x3c1/0x880 net/netfilter/nft_reject_inet.c:48 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x438/0x22a0 net/netfilter/nf_tables_core.c:288 nft_do_chain_inet+0x41a/0x4f0 net/netfilter/nft_chain_filter.c:161 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0x29b/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5775 process_backlog+0x4ad/0xa50 net/core/dev.c:6108 __napi_poll+0xe7/0x980 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0xa5a/0x19b0 net/core/dev.c:6963 handle_softirqs+0x1ce/0x800 kernel/softirq.c:554 __do_softirq+0x14/0x1a kernel/softirq.c:588 Fixes: c8d7b98bec43 ("netfilter: move nf_send_resetX() code to nf_reject_ipvX modules") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20240913170615.3670897-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nf_reject_ipv6.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 71d692728230..690d1c047691 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -223,33 +223,23 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, const struct tcphdr *oth, unsigned int otcplen) { struct tcphdr *tcph; - int needs_ack; skb_reset_transport_header(nskb); - tcph = skb_put(nskb, sizeof(struct tcphdr)); + tcph = skb_put_zero(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; tcph->source = oth->dest; tcph->dest = oth->source; if (oth->ack) { - needs_ack = 0; tcph->seq = oth->ack_seq; - tcph->ack_seq = 0; } else { - needs_ack = 1; tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + otcplen - (oth->doff<<2)); - tcph->seq = 0; + tcph->ack = 1; } - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; /* Adjust TCP checksum */ tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, From d2abc379071881798d20e2ac1d332ad855ae22f3 Mon Sep 17 00:00:00 2001 From: Kaixin Wang Date: Sun, 15 Sep 2024 22:40:46 +0800 Subject: [PATCH 366/534] net: seeq: Fix use after free vulnerability in ether3 Driver Due to Race Condition [ Upstream commit b5109b60ee4fcb2f2bb24f589575e10cc5283ad4 ] In the ether3_probe function, a timer is initialized with a callback function ether3_ledoff, bound to &prev(dev)->timer. Once the timer is started, there is a risk of a race condition if the module or device is removed, triggering the ether3_remove function to perform cleanup. The sequence of operations that may lead to a UAF bug is as follows: CPU0 CPU1 | ether3_ledoff ether3_remove | free_netdev(dev); | put_devic | kfree(dev); | | ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2); | // use dev Fix it by ensuring that the timer is canceled before proceeding with the cleanup in ether3_remove. Fixes: 6fd9c53f7186 ("net: seeq: Convert timers to use timer_setup()") Signed-off-by: Kaixin Wang Link: https://patch.msgid.link/20240915144045.451-1-kxwang23@m.fudan.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/seeq/ether3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index c672f92d65e9..9319a2675e7b 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -847,9 +847,11 @@ static void ether3_remove(struct expansion_card *ec) { struct net_device *dev = ecard_get_drvdata(ec); + ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2); ecard_set_drvdata(ec, NULL); unregister_netdev(dev); + del_timer_sync(&priv(dev)->timer); free_netdev(dev); ecard_release_resources(ec); } From 88297d3c1a71234e3d1533f776a39e1b71553121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Sep 2024 20:57:13 +0200 Subject: [PATCH 367/534] net: ipv6: select DST_CACHE from IPV6_RPL_LWTUNNEL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 93c21077bb9ba08807c459982d440dbbee4c7af3 ] The rpl sr tunnel code contains calls to dst_cache_*() which are only present when the dst cache is built. Select DST_CACHE to build the dst cache, similar to other kconfig options in the same file. Compiling the rpl sr tunnel without DST_CACHE will lead to linker errors. Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Signed-off-by: Thomas Weißschuh Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 08d4b7132d4c..1c9c686d9522 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -323,6 +323,7 @@ config IPV6_RPL_LWTUNNEL bool "IPv6: RPL Source Routing Header support" depends on IPV6 select LWTUNNEL + select DST_CACHE help Support for RFC6554 RPL Source Routing Header using the lightweight tunnels mechanism. From 570f7d8c9bf14f041152ba8353d4330ef7575915 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 10 Sep 2024 15:08:22 -0400 Subject: [PATCH 368/534] tcp: check skb is non-NULL in tcp_rto_delta_us() [ Upstream commit c8770db2d54437a5f49417ae7b46f7de23d14db6 ] We have some machines running stock Ubuntu 20.04.6 which is their 5.4.0-174-generic kernel that are running ceph and recently hit a null ptr dereference in tcp_rearm_rto(). Initially hitting it from the TLP path, but then later we also saw it getting hit from the RACK case as well. Here are examples of the oops messages we saw in each of those cases: Jul 26 15:05:02 rx [11061395.780353] BUG: kernel NULL pointer dereference, address: 0000000000000020 Jul 26 15:05:02 rx [11061395.787572] #PF: supervisor read access in kernel mode Jul 26 15:05:02 rx [11061395.792971] #PF: error_code(0x0000) - not-present page Jul 26 15:05:02 rx [11061395.798362] PGD 0 P4D 0 Jul 26 15:05:02 rx [11061395.801164] Oops: 0000 [#1] SMP NOPTI Jul 26 15:05:02 rx [11061395.805091] CPU: 0 PID: 9180 Comm: msgr-worker-1 Tainted: G W 5.4.0-174-generic #193-Ubuntu Jul 26 15:05:02 rx [11061395.814996] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Jul 26 15:05:02 rx [11061395.825952] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.830656] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Jul 26 15:05:02 rx [11061395.849665] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246 Jul 26 15:05:02 rx [11061395.855149] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Jul 26 15:05:02 rx [11061395.862542] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60 Jul 26 15:05:02 rx [11061395.869933] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8 Jul 26 15:05:02 rx [11061395.877318] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900 Jul 26 15:05:02 rx [11061395.884710] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30 Jul 26 15:05:02 rx [11061395.892095] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000 Jul 26 15:05:02 rx [11061395.900438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 15:05:02 rx [11061395.906435] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0 Jul 26 15:05:02 rx [11061395.913822] PKRU: 55555554 Jul 26 15:05:02 rx [11061395.916786] Call Trace: Jul 26 15:05:02 rx [11061395.919488] Jul 26 15:05:02 rx [11061395.921765] ? show_regs.cold+0x1a/0x1f Jul 26 15:05:02 rx [11061395.925859] ? __die+0x90/0xd9 Jul 26 15:05:02 rx [11061395.929169] ? no_context+0x196/0x380 Jul 26 15:05:02 rx [11061395.933088] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0 Jul 26 15:05:02 rx [11061395.938216] ? ip6_sublist_rcv_finish+0x3d/0x50 Jul 26 15:05:02 rx [11061395.943000] ? __bad_area_nosemaphore+0x50/0x1a0 Jul 26 15:05:02 rx [11061395.947873] ? bad_area_nosemaphore+0x16/0x20 Jul 26 15:05:02 rx [11061395.952486] ? do_user_addr_fault+0x267/0x450 Jul 26 15:05:02 rx [11061395.957104] ? ipv6_list_rcv+0x112/0x140 Jul 26 15:05:02 rx [11061395.961279] ? __do_page_fault+0x58/0x90 Jul 26 15:05:02 rx [11061395.965458] ? do_page_fault+0x2c/0xe0 Jul 26 15:05:02 rx [11061395.969465] ? page_fault+0x34/0x40 Jul 26 15:05:02 rx [11061395.973217] ? tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.977313] ? tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061395.981408] tcp_send_loss_probe+0x10b/0x220 Jul 26 15:05:02 rx [11061395.985937] tcp_write_timer_handler+0x1b4/0x240 Jul 26 15:05:02 rx [11061395.990809] tcp_write_timer+0x9e/0xe0 Jul 26 15:05:02 rx [11061395.994814] ? tcp_write_timer_handler+0x240/0x240 Jul 26 15:05:02 rx [11061395.999866] call_timer_fn+0x32/0x130 Jul 26 15:05:02 rx [11061396.003782] __run_timers.part.0+0x180/0x280 Jul 26 15:05:02 rx [11061396.008309] ? recalibrate_cpu_khz+0x10/0x10 Jul 26 15:05:02 rx [11061396.012841] ? native_x2apic_icr_write+0x30/0x30 Jul 26 15:05:02 rx [11061396.017718] ? lapic_next_event+0x21/0x30 Jul 26 15:05:02 rx [11061396.021984] ? clockevents_program_event+0x8f/0xe0 Jul 26 15:05:02 rx [11061396.027035] run_timer_softirq+0x2a/0x50 Jul 26 15:05:02 rx [11061396.031212] __do_softirq+0xd1/0x2c1 Jul 26 15:05:02 rx [11061396.035044] do_softirq_own_stack+0x2a/0x40 Jul 26 15:05:02 rx [11061396.039480] Jul 26 15:05:02 rx [11061396.041840] do_softirq.part.0+0x46/0x50 Jul 26 15:05:02 rx [11061396.046022] __local_bh_enable_ip+0x50/0x60 Jul 26 15:05:02 rx [11061396.050460] _raw_spin_unlock_bh+0x1e/0x20 Jul 26 15:05:02 rx [11061396.054817] nf_conntrack_tcp_packet+0x29e/0xbe0 [nf_conntrack] Jul 26 15:05:02 rx [11061396.060994] ? get_l4proto+0xe7/0x190 [nf_conntrack] Jul 26 15:05:02 rx [11061396.066220] nf_conntrack_in+0xe9/0x670 [nf_conntrack] Jul 26 15:05:02 rx [11061396.071618] ipv6_conntrack_local+0x14/0x20 [nf_conntrack] Jul 26 15:05:02 rx [11061396.077356] nf_hook_slow+0x45/0xb0 Jul 26 15:05:02 rx [11061396.081098] ip6_xmit+0x3f0/0x5d0 Jul 26 15:05:02 rx [11061396.084670] ? ipv6_anycast_cleanup+0x50/0x50 Jul 26 15:05:02 rx [11061396.089282] ? __sk_dst_check+0x38/0x70 Jul 26 15:05:02 rx [11061396.093381] ? inet6_csk_route_socket+0x13b/0x200 Jul 26 15:05:02 rx [11061396.098346] inet6_csk_xmit+0xa7/0xf0 Jul 26 15:05:02 rx [11061396.102263] __tcp_transmit_skb+0x550/0xb30 Jul 26 15:05:02 rx [11061396.106701] tcp_write_xmit+0x3c6/0xc20 Jul 26 15:05:02 rx [11061396.110792] ? __alloc_skb+0x98/0x1d0 Jul 26 15:05:02 rx [11061396.114708] __tcp_push_pending_frames+0x37/0x100 Jul 26 15:05:02 rx [11061396.119667] tcp_push+0xfd/0x100 Jul 26 15:05:02 rx [11061396.123150] tcp_sendmsg_locked+0xc70/0xdd0 Jul 26 15:05:02 rx [11061396.127588] tcp_sendmsg+0x2d/0x50 Jul 26 15:05:02 rx [11061396.131245] inet6_sendmsg+0x43/0x70 Jul 26 15:05:02 rx [11061396.135075] __sock_sendmsg+0x48/0x70 Jul 26 15:05:02 rx [11061396.138994] ____sys_sendmsg+0x212/0x280 Jul 26 15:05:02 rx [11061396.143172] ___sys_sendmsg+0x88/0xd0 Jul 26 15:05:02 rx [11061396.147098] ? __seccomp_filter+0x7e/0x6b0 Jul 26 15:05:02 rx [11061396.151446] ? __switch_to+0x39c/0x460 Jul 26 15:05:02 rx [11061396.155453] ? __switch_to_asm+0x42/0x80 Jul 26 15:05:02 rx [11061396.159636] ? __switch_to_asm+0x5a/0x80 Jul 26 15:05:02 rx [11061396.163816] __sys_sendmsg+0x5c/0xa0 Jul 26 15:05:02 rx [11061396.167647] __x64_sys_sendmsg+0x1f/0x30 Jul 26 15:05:02 rx [11061396.171832] do_syscall_64+0x57/0x190 Jul 26 15:05:02 rx [11061396.175748] entry_SYSCALL_64_after_hwframe+0x5c/0xc1 Jul 26 15:05:02 rx [11061396.181055] RIP: 0033:0x7f1ef692618d Jul 26 15:05:02 rx [11061396.184893] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 ca ee ff ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 2f 44 89 c7 48 89 44 24 08 e8 fe ee ff ff 48 Jul 26 15:05:02 rx [11061396.203889] RSP: 002b:00007f1ef4a26aa0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e Jul 26 15:05:02 rx [11061396.211708] RAX: ffffffffffffffda RBX: 000000000000084b RCX: 00007f1ef692618d Jul 26 15:05:02 rx [11061396.219091] RDX: 0000000000004000 RSI: 00007f1ef4a26b10 RDI: 0000000000000275 Jul 26 15:05:02 rx [11061396.226475] RBP: 0000000000004000 R08: 0000000000000000 R09: 0000000000000020 Jul 26 15:05:02 rx [11061396.233859] R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000084b Jul 26 15:05:02 rx [11061396.241243] R13: 00007f1ef4a26b10 R14: 0000000000000275 R15: 000055592030f1e8 Jul 26 15:05:02 rx [11061396.248628] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler nft_ct sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 mlx5_core hid_generic pci_hyperv_intf crc32_pclmul tls usbhid ahci mlxfw bnxt_en libahci hid nvme i2c_piix4 nvme_core wmi Jul 26 15:05:02 rx [11061396.324334] CR2: 0000000000000020 Jul 26 15:05:02 rx [11061396.327944] ---[ end trace 68a2b679d1cfb4f1 ]--- Jul 26 15:05:02 rx [11061396.433435] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Jul 26 15:05:02 rx [11061396.438137] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Jul 26 15:05:02 rx [11061396.457144] RSP: 0018:ffffb75d40003e08 EFLAGS: 00010246 Jul 26 15:05:02 rx [11061396.462629] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Jul 26 15:05:02 rx [11061396.470012] RDX: 0000000062177c30 RSI: 000000000000231c RDI: ffff9874ad283a60 Jul 26 15:05:02 rx [11061396.477396] RBP: ffffb75d40003e20 R08: 0000000000000000 R09: ffff987605e20aa8 Jul 26 15:05:02 rx [11061396.484779] R10: ffffb75d40003f00 R11: ffffb75d4460f740 R12: ffff9874ad283900 Jul 26 15:05:02 rx [11061396.492164] R13: ffff9874ad283a60 R14: ffff9874ad283980 R15: ffff9874ad283d30 Jul 26 15:05:02 rx [11061396.499547] FS: 00007f1ef4a2e700(0000) GS:ffff987605e00000(0000) knlGS:0000000000000000 Jul 26 15:05:02 rx [11061396.507886] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 15:05:02 rx [11061396.513884] CR2: 0000000000000020 CR3: 0000003e450ba003 CR4: 0000000000760ef0 Jul 26 15:05:02 rx [11061396.521267] PKRU: 55555554 Jul 26 15:05:02 rx [11061396.524230] Kernel panic - not syncing: Fatal exception in interrupt Jul 26 15:05:02 rx [11061396.530885] Kernel Offset: 0x1b200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Jul 26 15:05:03 rx [11061396.660181] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- After we hit this we disabled TLP by setting tcp_early_retrans to 0 and then hit the crash in the RACK case: Aug 7 07:26:16 rx [1006006.265582] BUG: kernel NULL pointer dereference, address: 0000000000000020 Aug 7 07:26:16 rx [1006006.272719] #PF: supervisor read access in kernel mode Aug 7 07:26:16 rx [1006006.278030] #PF: error_code(0x0000) - not-present page Aug 7 07:26:16 rx [1006006.283343] PGD 0 P4D 0 Aug 7 07:26:16 rx [1006006.286057] Oops: 0000 [#1] SMP NOPTI Aug 7 07:26:16 rx [1006006.289896] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.4.0-174-generic #193-Ubuntu Aug 7 07:26:16 rx [1006006.299107] Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Aug 7 07:26:16 rx [1006006.309970] RIP: 0010:tcp_rearm_rto+0xe4/0x160 Aug 7 07:26:16 rx [1006006.314584] Code: 87 ca 04 00 00 00 5b 41 5c 41 5d 5d c3 c3 49 8b bc 24 40 06 00 00 eb 8d 48 bb cf f7 53 e3 a5 9b c4 20 4c 89 ef e8 0c fe 0e 00 <48> 8b 78 20 48 c1 ef 03 48 89 f8 41 8b bc 24 80 04 00 00 48 f7 e3 Aug 7 07:26:16 rx [1006006.333499] RSP: 0018:ffffb42600a50960 EFLAGS: 00010246 Aug 7 07:26:16 rx [1006006.338895] RAX: 0000000000000000 RBX: 20c49ba5e353f7cf RCX: 0000000000000000 Aug 7 07:26:16 rx [1006006.346193] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff92d687ed8160 Aug 7 07:26:16 rx [1006006.353489] RBP: ffffb42600a50978 R08: 0000000000000000 R09: 00000000cd896dcc Aug 7 07:26:16 rx [1006006.360786] R10: ffff92dc3404f400 R11: 0000000000000001 R12: ffff92d687ed8000 Aug 7 07:26:16 rx [1006006.368084] R13: ffff92d687ed8160 R14: 00000000cd896dcc R15: 00000000cd8fca81 Aug 7 07:26:16 rx [1006006.375381] FS: 0000000000000000(0000) GS:ffff93158ad40000(0000) knlGS:0000000000000000 Aug 7 07:26:16 rx [1006006.383632] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Aug 7 07:26:16 rx [1006006.389544] CR2: 0000000000000020 CR3: 0000003e775ce006 CR4: 0000000000760ee0 Aug 7 07:26:16 rx [1006006.396839] PKRU: 55555554 Aug 7 07:26:16 rx [1006006.399717] Call Trace: Aug 7 07:26:16 rx [1006006.402335] Aug 7 07:26:16 rx [1006006.404525] ? show_regs.cold+0x1a/0x1f Aug 7 07:26:16 rx [1006006.408532] ? __die+0x90/0xd9 Aug 7 07:26:16 rx [1006006.411760] ? no_context+0x196/0x380 Aug 7 07:26:16 rx [1006006.415599] ? __bad_area_nosemaphore+0x50/0x1a0 Aug 7 07:26:16 rx [1006006.420392] ? _raw_spin_lock+0x1e/0x30 Aug 7 07:26:16 rx [1006006.424401] ? bad_area_nosemaphore+0x16/0x20 Aug 7 07:26:16 rx [1006006.428927] ? do_user_addr_fault+0x267/0x450 Aug 7 07:26:16 rx [1006006.433450] ? __do_page_fault+0x58/0x90 Aug 7 07:26:16 rx [1006006.437542] ? do_page_fault+0x2c/0xe0 Aug 7 07:26:16 rx [1006006.441470] ? page_fault+0x34/0x40 Aug 7 07:26:16 rx [1006006.445134] ? tcp_rearm_rto+0xe4/0x160 Aug 7 07:26:16 rx [1006006.449145] tcp_ack+0xa32/0xb30 Aug 7 07:26:16 rx [1006006.452542] tcp_rcv_established+0x13c/0x670 Aug 7 07:26:16 rx [1006006.456981] ? sk_filter_trim_cap+0x48/0x220 Aug 7 07:26:16 rx [1006006.461419] tcp_v6_do_rcv+0xdb/0x450 Aug 7 07:26:16 rx [1006006.465257] tcp_v6_rcv+0xc2b/0xd10 Aug 7 07:26:16 rx [1006006.468918] ip6_protocol_deliver_rcu+0xd3/0x4e0 Aug 7 07:26:16 rx [1006006.473706] ip6_input_finish+0x15/0x20 Aug 7 07:26:16 rx [1006006.477710] ip6_input+0xa2/0xb0 Aug 7 07:26:16 rx [1006006.481109] ? ip6_protocol_deliver_rcu+0x4e0/0x4e0 Aug 7 07:26:16 rx [1006006.486151] ip6_sublist_rcv_finish+0x3d/0x50 Aug 7 07:26:16 rx [1006006.490679] ip6_sublist_rcv+0x1aa/0x250 Aug 7 07:26:16 rx [1006006.494779] ? ip6_rcv_finish_core.isra.0+0xa0/0xa0 Aug 7 07:26:16 rx [1006006.499828] ipv6_list_rcv+0x112/0x140 Aug 7 07:26:16 rx [1006006.503748] __netif_receive_skb_list_core+0x1a4/0x250 Aug 7 07:26:16 rx [1006006.509057] netif_receive_skb_list_internal+0x1a1/0x2b0 Aug 7 07:26:16 rx [1006006.514538] gro_normal_list.part.0+0x1e/0x40 Aug 7 07:26:16 rx [1006006.519068] napi_complete_done+0x91/0x130 Aug 7 07:26:16 rx [1006006.523352] mlx5e_napi_poll+0x18e/0x610 [mlx5_core] Aug 7 07:26:16 rx [1006006.528481] net_rx_action+0x142/0x390 Aug 7 07:26:16 rx [1006006.532398] __do_softirq+0xd1/0x2c1 Aug 7 07:26:16 rx [1006006.536142] irq_exit+0xae/0xb0 Aug 7 07:26:16 rx [1006006.539452] do_IRQ+0x5a/0xf0 Aug 7 07:26:16 rx [1006006.542590] common_interrupt+0xf/0xf Aug 7 07:26:16 rx [1006006.546421] Aug 7 07:26:16 rx [1006006.548695] RIP: 0010:native_safe_halt+0xe/0x10 Aug 7 07:26:16 rx [1006006.553399] Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65 Aug 7 07:26:16 rx [1006006.572309] RSP: 0018:ffffb42600177e70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffc2 Aug 7 07:26:16 rx [1006006.580040] RAX: ffffffff8ed08b20 RBX: 0000000000000005 RCX: 0000000000000001 Aug 7 07:26:16 rx [1006006.587337] RDX: 00000000f48eeca2 RSI: 0000000000000082 RDI: 0000000000000082 Aug 7 07:26:16 rx [1006006.594635] RBP: ffffb42600177e90 R08: 0000000000000000 R09: 000000000000020f Aug 7 07:26:16 rx [1006006.601931] R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000005 Aug 7 07:26:16 rx [1006006.609229] R13: ffff93157deb5f00 R14: 0000000000000000 R15: 0000000000000000 Aug 7 07:26:16 rx [1006006.616530] ? __cpuidle_text_start+0x8/0x8 Aug 7 07:26:16 rx [1006006.620886] ? default_idle+0x20/0x140 Aug 7 07:26:16 rx [1006006.624804] arch_cpu_idle+0x15/0x20 Aug 7 07:26:16 rx [1006006.628545] default_idle_call+0x23/0x30 Aug 7 07:26:16 rx [1006006.632640] do_idle+0x1fb/0x270 Aug 7 07:26:16 rx [1006006.636035] cpu_startup_entry+0x20/0x30 Aug 7 07:26:16 rx [1006006.640126] start_secondary+0x178/0x1d0 Aug 7 07:26:16 rx [1006006.644218] secondary_startup_64+0xa4/0xb0 Aug 7 07:26:17 rx [1006006.648568] Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif input_leds joydev rndis_host cdc_ether usbnet ast mii drm_vram_helper ttm drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt ccp mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel nf_tables_set nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink ramoops reed_solomon efi_pstore drm ip_tables x_tables autofs4 raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear mlx5_ib ib_uverbs ib_core raid1 hid_generic mlx5_core pci_hyperv_intf crc32_pclmul usbhid ahci tls mlxfw bnxt_en hid libahci nvme i2c_piix4 nvme_core wmi [last unloaded: cpuid] Aug 7 07:26:17 rx [1006006.726180] CR2: 0000000000000020 Aug 7 07:26:17 rx [1006006.729718] ---[ end trace e0e2e37e4e612984 ]--- Prior to seeing the first crash and on other machines we also see the warning in tcp_send_loss_probe() where packets_out is non-zero, but both transmit and retrans queues are empty so we know the box is seeing some accounting issue in this area: Jul 26 09:15:27 kernel: ------------[ cut here ]------------ Jul 26 09:15:27 kernel: invalid inflight: 2 state 1 cwnd 68 mss 8988 Jul 26 09:15:27 kernel: WARNING: CPU: 16 PID: 0 at net/ipv4/tcp_output.c:2605 tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: Modules linked in: vrf bridge stp llc vxlan ip6_udp_tunnel udp_tunnel nls_iso8859_1 nft_ct amd64_edac_mod edac_mce_amd kvm_amd kvm crct10dif_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper wmi_bmof ipmi_ssif joydev input_leds rndis_host cdc_ether usbnet mii ast drm_vram_helper ttm drm_kms_he> Jul 26 09:15:27 kernel: CPU: 16 PID: 0 Comm: swapper/16 Not tainted 5.4.0-174-generic #193-Ubuntu Jul 26 09:15:27 kernel: Hardware name: Supermicro SMC 2x26 os-gen8 64C NVME-Y 256G/H12SSW-NTR, BIOS 2.5.V1.2U.NVMe.UEFI 05/09/2023 Jul 26 09:15:27 kernel: RIP: 0010:tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: Code: 08 26 01 00 75 e2 41 0f b6 54 24 12 41 8b 8c 24 c0 06 00 00 45 89 f0 48 c7 c7 e0 b4 20 a7 c6 05 8d 08 26 01 01 e8 4a c0 0f 00 <0f> 0b eb ba 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 Jul 26 09:15:27 kernel: RSP: 0018:ffffb7838088ce00 EFLAGS: 00010286 Jul 26 09:15:27 kernel: RAX: 0000000000000000 RBX: ffff9b84b5630430 RCX: 0000000000000006 Jul 26 09:15:27 kernel: RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff9b8e4621c8c0 Jul 26 09:15:27 kernel: RBP: ffffb7838088ce18 R08: 0000000000000927 R09: 0000000000000004 Jul 26 09:15:27 kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff9b84b5630000 Jul 26 09:15:27 kernel: R13: 0000000000000000 R14: 000000000000231c R15: ffff9b84b5630430 Jul 26 09:15:27 kernel: FS: 0000000000000000(0000) GS:ffff9b8e46200000(0000) knlGS:0000000000000000 Jul 26 09:15:27 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Jul 26 09:15:27 kernel: CR2: 000056238cec2380 CR3: 0000003e49ede005 CR4: 0000000000760ee0 Jul 26 09:15:27 kernel: PKRU: 55555554 Jul 26 09:15:27 kernel: Call Trace: Jul 26 09:15:27 kernel: Jul 26 09:15:27 kernel: ? show_regs.cold+0x1a/0x1f Jul 26 09:15:27 kernel: ? __warn+0x98/0xe0 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? report_bug+0xd1/0x100 Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0 Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240 Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0 Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240 Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130 Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280 Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0 Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90 Jul 26 09:15:27 kernel: ? do_error_trap+0x9b/0xc0 Jul 26 09:15:27 kernel: ? do_invalid_op+0x3c/0x50 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: ? invalid_op+0x1e/0x30 Jul 26 09:15:27 kernel: ? tcp_send_loss_probe+0x214/0x220 Jul 26 09:15:27 kernel: tcp_write_timer_handler+0x1b4/0x240 Jul 26 09:15:27 kernel: tcp_write_timer+0x9e/0xe0 Jul 26 09:15:27 kernel: ? tcp_write_timer_handler+0x240/0x240 Jul 26 09:15:27 kernel: call_timer_fn+0x32/0x130 Jul 26 09:15:27 kernel: __run_timers.part.0+0x180/0x280 Jul 26 09:15:27 kernel: ? timerqueue_add+0x9b/0xb0 Jul 26 09:15:27 kernel: ? enqueue_hrtimer+0x3d/0x90 Jul 26 09:15:27 kernel: ? recalibrate_cpu_khz+0x10/0x10 Jul 26 09:15:27 kernel: ? ktime_get+0x3e/0xa0 Jul 26 09:15:27 kernel: ? native_x2apic_icr_write+0x30/0x30 Jul 26 09:15:27 kernel: run_timer_softirq+0x2a/0x50 Jul 26 09:15:27 kernel: __do_softirq+0xd1/0x2c1 Jul 26 09:15:27 kernel: irq_exit+0xae/0xb0 Jul 26 09:15:27 kernel: smp_apic_timer_interrupt+0x7b/0x140 Jul 26 09:15:27 kernel: apic_timer_interrupt+0xf/0x20 Jul 26 09:15:27 kernel: Jul 26 09:15:27 kernel: RIP: 0010:native_safe_halt+0xe/0x10 Jul 26 09:15:27 kernel: Code: 7b ff ff ff eb bd 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 36 2c 50 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 26 2c 50 00 fb f4 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 e8 dd 5e 61 ff 65 Jul 26 09:15:27 kernel: RSP: 0018:ffffb783801cfe70 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 Jul 26 09:15:27 kernel: RAX: ffffffffa6908b20 RBX: 0000000000000010 RCX: 0000000000000001 Jul 26 09:15:27 kernel: RDX: 000000006fc0c97e RSI: 0000000000000082 RDI: 0000000000000082 Jul 26 09:15:27 kernel: RBP: ffffb783801cfe90 R08: 0000000000000000 R09: 0000000000000225 Jul 26 09:15:27 kernel: R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000010 Jul 26 09:15:27 kernel: R13: ffff9b8e390b0000 R14: 0000000000000000 R15: 0000000000000000 Jul 26 09:15:27 kernel: ? __cpuidle_text_start+0x8/0x8 Jul 26 09:15:27 kernel: ? default_idle+0x20/0x140 Jul 26 09:15:27 kernel: arch_cpu_idle+0x15/0x20 Jul 26 09:15:27 kernel: default_idle_call+0x23/0x30 Jul 26 09:15:27 kernel: do_idle+0x1fb/0x270 Jul 26 09:15:27 kernel: cpu_startup_entry+0x20/0x30 Jul 26 09:15:27 kernel: start_secondary+0x178/0x1d0 Jul 26 09:15:27 kernel: secondary_startup_64+0xa4/0xb0 Jul 26 09:15:27 kernel: ---[ end trace e7ac822987e33be1 ]--- The NULL ptr deref is coming from tcp_rto_delta_us() attempting to pull an skb off the head of the retransmit queue and then dereferencing that skb to get the skb_mstamp_ns value via tcp_skb_timestamp_us(skb). The crash is the same one that was reported a # of years ago here: https://lore.kernel.org/netdev/86c0f836-9a7c-438b-d81a-839be45f1f58@gmail.com/T/#t and the kernel we're running has the fix which was added to resolve this issue. Unfortunately we've been unsuccessful so far in reproducing this problem in the lab and do not have the luxury of pushing out a new kernel to try and test if newer kernels resolve this issue at the moment. I realize this is a report against both an Ubuntu kernel and also an older 5.4 kernel. I have reported this issue to Ubuntu here: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2077657 however I feel like since this issue has possibly cropped up again it makes sense to build in some protection in this path (even on the latest kernel versions) since the code in question just blindly assumes there's a valid skb without testing if it's NULL b/f it looks at the timestamp. Given we have seen crashes in this path before and now this case it seems like we should protect ourselves for when packets_out accounting is incorrect. While we should fix that root cause we should also just make sure the skb is not NULL before dereferencing it. Also add a warn once here to capture some information if/when the problem case is hit again. Fixes: e1a10ef7fa87 ("tcp: introduce tcp_rto_delta_us() helper for xmit timer fix") Signed-off-by: Josh Hunt Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tcp.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index c206ffaa8ed7..b3917af309e0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2230,9 +2230,26 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; - u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); - return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + if (likely(skb)) { + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + } else { + WARN_ONCE(1, + "rtx queue emtpy: " + "out:%u sacked:%u lost:%u retrans:%u " + "tlp_high_seq:%u sk_state:%u ca_state:%u " + "advmss:%u mss_cache:%u pmtu:%u\n", + tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, + tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, + tcp_sk(sk)->tlp_high_seq, sk->sk_state, + inet_csk(sk)->icsk_ca_state, + tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, + inet_csk(sk)->icsk_pmtu_cookie); + return jiffies_to_usecs(rto); + } + } /* From 00a0c2d49bb59151e50ca8f9eed62497e0e98aa9 Mon Sep 17 00:00:00 2001 From: Youssef Samir Date: Mon, 16 Sep 2024 19:08:58 +0200 Subject: [PATCH 369/534] net: qrtr: Update packets cloning when broadcasting [ Upstream commit f011b313e8ebd5b7abd8521b5119aecef403de45 ] When broadcasting data to multiple nodes via MHI, using skb_clone() causes all nodes to receive the same header data. This can result in packets being discarded by endpoints, leading to lost data. This issue occurs when a socket is closed, and a QRTR_TYPE_DEL_CLIENT packet is broadcasted. All nodes receive the same destination node ID, causing the node connected to the client to discard the packet and remain unaware of the client's deletion. Replace skb_clone() with pskb_copy(), to create a separate copy of the header for each sk_buff. Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") Signed-off-by: Youssef Samir Reviewed-by: Jeffery Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Chris Lew Link: https://patch.msgid.link/20240916170858.2382247-1-quic_yabdulra@quicinc.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/qrtr/af_qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 41ece61eb57a..00c51cf693f3 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -884,7 +884,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, mutex_lock(&qrtr_node_lock); list_for_each_entry(node, &qrtr_all_nodes, item) { - skbn = skb_clone(skb, GFP_KERNEL); + skbn = pskb_copy(skb, GFP_KERNEL); if (!skbn) break; skb_set_owner_w(skbn, skb->sk); From ccd3e6ff05e5236d1b9535f23f3e6622e0bb32b8 Mon Sep 17 00:00:00 2001 From: Jiwon Kim Date: Wed, 18 Sep 2024 14:06:02 +0000 Subject: [PATCH 370/534] bonding: Fix unnecessary warnings and logs from bond_xdp_get_xmit_slave() [ Upstream commit 0cbfd45fbcf0cb26d85c981b91c62fe73cdee01c ] syzbot reported a WARNING in bond_xdp_get_xmit_slave. To reproduce this[1], one bond device (bond1) has xdpdrv, which increases bpf_master_redirect_enabled_key. Another bond device (bond0) which is unsupported by XDP but its slave (veth3) has xdpgeneric that returns XDP_TX. This triggers WARN_ON_ONCE() from the xdp_master_redirect(). To reduce unnecessary warnings and improve log management, we need to delete the WARN_ON_ONCE() and add ratelimit to the netdev_err(). [1] Steps to reproduce: # Needs tx_xdp with return XDP_TX; ip l add veth0 type veth peer veth1 ip l add veth3 type veth peer veth4 ip l add bond0 type bond mode 6 # BOND_MODE_ALB, unsupported by XDP ip l add bond1 type bond # BOND_MODE_ROUNDROBIN by default ip l set veth0 master bond1 ip l set bond1 up # Increases bpf_master_redirect_enabled_key ip l set dev bond1 xdpdrv object tx_xdp.o section xdp_tx ip l set veth3 master bond0 ip l set bond0 up ip l set veth4 up # Triggers WARN_ON_ONCE() from the xdp_master_redirect() ip l set veth3 xdpgeneric object tx_xdp.o section xdp_tx Reported-by: syzbot+c187823a52ed505b2257@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c187823a52ed505b2257 Fixes: 9e2ee5c7e7c3 ("net, bonding: Add XDP support to the bonding driver") Signed-off-by: Jiwon Kim Signed-off-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20240918140602.18644-1-jiwonaid0@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 53a7b53618d9..14b4780b73c7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5534,9 +5534,9 @@ bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) break; default: - /* Should never happen. Mode guarded by bond_xdp_check() */ - netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); - WARN_ON_ONCE(1); + if (net_ratelimit()) + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", + BOND_MODE(bond)); return NULL; } From e9e3424d6d4b34d2c15071c71ec249a9f78ff341 Mon Sep 17 00:00:00 2001 From: Wenbo Li Date: Thu, 19 Sep 2024 16:13:51 +0800 Subject: [PATCH 371/534] virtio_net: Fix mismatched buf address when unmapping for small packets [ Upstream commit c11a49d58ad229a1be1ebe08a2b68fedf83db6c8 ] Currently, the virtio-net driver will perform a pre-dma-mapping for small or mergeable RX buffer. But for small packets, a mismatched address without VIRTNET_RX_PAD and xdp_headroom is used for unmapping. That will result in unsynchronized buffers when SWIOTLB is enabled, for example, when running as a TDX guest. This patch unifies the address passed to the virtio core as the address of the virtnet header and fixes the mismatched buffer address. Changes from v2: unify the buf that passed to the virtio core in small and merge mode. Changes from v1: Use ctx to get xdp_headroom. Fixes: 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") Signed-off-by: Wenbo Li Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20240919081351.51772-1-liwenbo.martin@bytedance.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index bc01f2dafa94..2da3be3fb942 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1269,6 +1269,11 @@ static struct sk_buff *receive_small(struct net_device *dev, struct page *page = virt_to_head_page(buf); struct sk_buff *skb; + /* We passed the address of virtnet header to virtio-core, + * so truncate the padding. + */ + buf -= VIRTNET_RX_PAD + xdp_headroom; + len -= vi->hdr_len; u64_stats_add(&stats->bytes, len); @@ -1859,8 +1864,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, if (unlikely(!buf)) return -ENOMEM; - virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, - vi->hdr_len + GOOD_PACKET_LEN); + buf += VIRTNET_RX_PAD + xdp_headroom; + + virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { From 3e8ac2743d487c34af79d20a7615190b4e59e29d Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Thu, 19 Sep 2024 20:10:28 +0800 Subject: [PATCH 372/534] net: stmmac: set PP_FLAG_DMA_SYNC_DEV only if XDP is enabled [ Upstream commit b514c47ebf41a6536551ed28a05758036e6eca7c ] Commit 5fabb01207a2 ("net: stmmac: Add initial XDP support") sets PP_FLAG_DMA_SYNC_DEV flag for page_pool unconditionally, page_pool_recycle_direct() will call page_pool_dma_sync_for_device() on every page even the page is not going to be reused by XDP program. When XDP is not enabled, the page which holds the received buffer will be recycled once the buffer is copied into new SKB by skb_copy_to_linear_data(), then the MAC core will never reuse this page any longer. Always setting PP_FLAG_DMA_SYNC_DEV wastes CPU cycles on unnecessary calling of page_pool_dma_sync_for_device(). After this patch, up to 9% noticeable performance improvement was observed on certain platforms. Fixes: 5fabb01207a2 ("net: stmmac: Add initial XDP support") Signed-off-by: Furong Xu <0x1207@gmail.com> Link: https://patch.msgid.link/20240919121028.1348023-1-0x1207@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d6167a7b19f2..89a80e3e8bb8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2018,7 +2018,7 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, rx_q->queue_index = queue; rx_q->priv_data = priv; - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.flags = PP_FLAG_DMA_MAP | (xdp_prog ? PP_FLAG_DMA_SYNC_DEV : 0); pp_params.pool_size = dma_conf->dma_rx_size; num_pages = DIV_ROUND_UP(dma_conf->dma_buf_sz, PAGE_SIZE); pp_params.order = ilog2(num_pages); From 668f4df6d6df17330e35f434b1f0633709f5fb33 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 12 Sep 2024 14:21:33 +0200 Subject: [PATCH 373/534] netfilter: nf_tables: Keep deleted flowtable hooks until after RCU [ Upstream commit 642c89c475419b4d0c0d90e29d9c1a0e4351f379 ] Documentation of list_del_rcu() warns callers to not immediately free the deleted list item. While it seems not necessary to use the RCU-variant of list_del() here in the first place, doing so seems to require calling kfree_rcu() on the deleted item as well. Fixes: 3f0465a9ef02 ("netfilter: nf_tables: dynamically allocate hooks per net_device in flowtables") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6b6c16c5fc9a..935e953b8e1b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8943,7 +8943,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) flowtable->data.type->setup(&flowtable->data, hook->ops.dev, FLOW_BLOCK_UNBIND); list_del_rcu(&hook->list); - kfree(hook); + kfree_rcu(hook, rcu); } kfree(flowtable->name); module_put(flowtable->data.type->owner); From b3f7607f20035a91e33e7973124d663f271ef371 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 16 Sep 2024 16:14:41 +0100 Subject: [PATCH 374/534] netfilter: ctnetlink: compile ctnetlink_label_size with CONFIG_NF_CONNTRACK_EVENTS [ Upstream commit e1f1ee0e9ad8cbe660f5c104e791c5f1a7cf4c31 ] Only provide ctnetlink_label_size when it is used, which is when CONFIG_NF_CONNTRACK_EVENTS is configured. Flagged by clang-18 W=1 builds as: .../nf_conntrack_netlink.c:385:19: warning: unused function 'ctnetlink_label_size' [-Wunused-function] 385 | static inline int ctnetlink_label_size(const struct nf_conn *ct) | ^~~~~~~~~~~~~~~~~~~~ The condition on CONFIG_NF_CONNTRACK_LABELS being removed by this patch guards compilation of non-trivial implementations of ctnetlink_dump_labels() and ctnetlink_label_size(). However, this is not necessary as each of these functions will always return 0 if CONFIG_NF_CONNTRACK_LABELS is not defined as each function starts with the equivalent of: struct nf_conn_labels *labels = nf_ct_labels_find(ct); if (!labels) return 0; And nf_ct_labels_find always returns NULL if CONFIG_NF_CONNTRACK_LABELS is not enabled. So I believe that the compiler optimises the code away in such cases anyway. Found by inspection. Compile tested only. Originally splitted in two patches, Pablo Neira Ayuso collapsed them and added Fixes: tag. Fixes: 0ceabd83875b ("netfilter: ctnetlink: deliver labels to userspace") Link: https://lore.kernel.org/netfilter-devel/20240909151712.GZ2097826@kernel.org/ Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_netlink.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4dab45039f34..282e9644f6fd 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -381,7 +381,7 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif -#ifdef CONFIG_NF_CONNTRACK_LABELS +#ifdef CONFIG_NF_CONNTRACK_EVENTS static inline int ctnetlink_label_size(const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); @@ -390,6 +390,7 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct) return 0; return nla_total_size(sizeof(labels->bits)); } +#endif static int ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) @@ -410,10 +411,6 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) return 0; } -#else -#define ctnetlink_dump_labels(a, b) (0) -#define ctnetlink_label_size(a) (0) -#endif #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) From da2bb8e177a46f23ca5a0586173b2240fef86078 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 Sep 2024 23:07:46 +0200 Subject: [PATCH 375/534] netfilter: nf_tables: use rcu chain hook list iterator from netlink dump path [ Upstream commit 4ffcf5ca81c3b83180473eb0d3c010a1a7c6c4de ] Lockless iteration over hook list is possible from netlink dump path, use rcu variant to iterate over the hook list as is done with flowtable hooks. Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 935e953b8e1b..da7fd3919ce4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1778,7 +1778,7 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (!hook_list) hook_list = &basechain->hook_list; - list_for_each_entry(hook, hook_list, list) { + list_for_each_entry_rcu(hook, hook_list, list) { if (!first) first = hook; From 358124ba2cea9fc4d6c8d40607cac4a22f62ffe4 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Mon, 9 Sep 2024 17:00:36 +0200 Subject: [PATCH 376/534] io_uring/sqpoll: do not allow pinning outside of cpuset commit f011c9cf04c06f16b24f583d313d3c012e589e50 upstream. The submit queue polling threads are userland threads that just never exit to the userland. When creating the thread with IORING_SETUP_SQ_AFF, the affinity of the poller thread is set to the cpu specified in sq_thread_cpu. However, this CPU can be outside of the cpuset defined by the cgroup cpuset controller. This violates the rules defined by the cpuset controller and is a potential issue for realtime applications. In b7ed6d8ffd6 we fixed the default affinity of the poller thread, in case no explicit pinning is required by inheriting the one of the creating task. In case of explicit pinning, the check is more complicated, as also a cpu outside of the parent cpumask is allowed. We implemented this by using cpuset_cpus_allowed (that has support for cgroup cpusets) and testing if the requested cpu is in the set. Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker") Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Felix Moessbauer Link: https://lore.kernel.org/r/20240909150036.55921-1-felix.moessbauer@siemens.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/sqpoll.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 350436e55aaf..824db6f6ba23 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -401,10 +402,12 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, return 0; if (p->flags & IORING_SETUP_SQ_AFF) { + struct cpumask allowed_mask; int cpu = p->sq_thread_cpu; ret = -EINVAL; - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + cpuset_cpus_allowed(current, &allowed_mask); + if (!cpumask_test_cpu(cpu, &allowed_mask)) goto err_sqpoll; sqd->sq_cpu = cpu; } else { From 4bdf75c2ef3311342d35dae23f439439ec7a6886 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 18 Sep 2024 11:58:19 -0600 Subject: [PATCH 377/534] io_uring: check for presence of task_work rather than TIF_NOTIFY_SIGNAL commit 04beb6e0e08c30c6f845f50afb7d7953603d7a6f upstream. If some part of the kernel adds task_work that needs executing, in terms of signaling it'll generally use TWA_SIGNAL or TWA_RESUME. Those two directly translate to TIF_NOTIFY_SIGNAL or TIF_NOTIFY_RESUME, and can be used for a variety of use case outside of task_work. However, io_cqring_wait_schedule() only tests explicitly for TIF_NOTIFY_SIGNAL. This means it can miss if task_work got added for the task, but used a different kind of signaling mechanism (or none at all). Normally this doesn't matter as any task_work will be run once the task exits to userspace, except if: 1) The ring is setup with DEFER_TASKRUN 2) The local work item may generate normal task_work For condition 2, this can happen when closing a file and it's the final put of that file, for example. This can cause stalls where a task is waiting to make progress inside io_cqring_wait(), but there's nothing else that will wake it up. Hence change the "should we schedule or loop around" check to check for the presence of task_work explicitly, rather than just TIF_NOTIFY_SIGNAL as the mechanism. While in there, also change the ordering of what type of task_work first in terms of ordering, to both make it consistent with other task_work runs in io_uring, but also to better handle the case of defer task_work generating normal task_work, like in the above example. Reported-by: Jan Hendrik Farr Link: https://github.com/axboe/liburing/issues/1235 Cc: stable@vger.kernel.org Fixes: 846072f16eed ("io_uring: mimimise io_cqring_wait_schedule") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 68504709f75c..7b0a100e1139 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2514,7 +2514,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, return 1; if (unlikely(!llist_empty(&ctx->work_llist))) return 1; - if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) + if (unlikely(task_work_pending(current))) return 1; if (unlikely(task_sigpending(current))) return -EINTR; @@ -2610,9 +2610,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, * If we got woken because of task_work being processed, run it * now rather than let the caller do another wait loop. */ - io_run_task_work(); if (!llist_empty(&ctx->work_llist)) io_run_local_work(ctx, nr_wait); + io_run_task_work(); /* * Non-local task_work will be run on exit to userspace, but From 49d3a4ad57c57227c3b0fd6cd4188b2a5ebd6178 Mon Sep 17 00:00:00 2001 From: Shu Han Date: Tue, 17 Sep 2024 17:41:04 +0800 Subject: [PATCH 378/534] mm: call the security_mmap_file() LSM hook in remap_file_pages() commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 upstream. The remap_file_pages syscall handler calls do_mmap() directly, which doesn't contain the LSM security check. And if the process has called personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for RW pages, this will actually result in remapping the pages to RWX, bypassing a W^X policy enforced by SELinux. So we should check prot by security_mmap_file LSM hook in the remap_file_pages syscall handler before do_mmap() is called. Otherwise, it potentially permits an attacker to bypass a W^X policy enforced by SELinux. The bypass is similar to CVE-2016-10044, which bypass the same thing via AIO and can be found in [1]. The PoC: $ cat > test.c int main(void) { size_t pagesz = sysconf(_SC_PAGE_SIZE); int mfd = syscall(SYS_memfd_create, "test", 0); const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); unsigned int old = syscall(SYS_personality, 0xffffffff); syscall(SYS_personality, READ_IMPLIES_EXEC | old); syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0); syscall(SYS_personality, old); // show the RWX page exists even if W^X policy is enforced int fd = open("/proc/self/maps", O_RDONLY); unsigned char buf2[1024]; while (1) { int ret = read(fd, buf2, 1024); if (ret <= 0) break; write(1, buf2, ret); } close(fd); } $ gcc test.c -o test $ ./test | grep rwx 7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted) Link: https://project-zero.issues.chromium.org/issues/42452389 [1] Cc: stable@vger.kernel.org Signed-off-by: Shu Han Acked-by: Stephen Smalley [PM: subject line tweaks] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 84e49e8bf010..6530e9cac458 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3025,8 +3025,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags |= MAP_LOCKED; file = get_file(vma->vm_file); + ret = security_mmap_file(vma->vm_file, prot, flags); + if (ret) + goto out_fput; ret = do_mmap(vma->vm_file, start, size, prot, flags, 0, pgoff, &populate, NULL); +out_fput: fput(file); out: mmap_write_unlock(mm); From b5d38f1d4acb3a0aa0763d4d70bec6c9cf51300c Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 12 Aug 2024 12:13:44 -0400 Subject: [PATCH 379/534] drm/amd/display: Fix Synaptics Cascaded Panamera DSC Determination commit 4437936c6b696b98f3fe1d8679a2788c41b4df77 upstream. Synaptics Cascaded Panamera topology needs to unconditionally acquire root aux for dsc decoding. Reviewed-by: Roman Li Signed-off-by: Fangzhi Zuo Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 3880ddf1c820..43173a1f99d0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -246,7 +246,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux; /* synaptics cascaded MST hub case */ - if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port)) + if (is_synaptics_cascaded_panamera(aconnector->dc_link, port)) aconnector->dsc_aux = port->mgr->aux; if (!aconnector->dsc_aux) From 0851b1ec650adadcaa23ec96daad95a55bf966f0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Aug 2024 14:32:05 -0400 Subject: [PATCH 380/534] drm/vmwgfx: Prevent unmapping active read buffers commit aba07b9a0587f50e5d3346eaa19019cf3f86c0ea upstream. The kms paths keep a persistent map active to read and compare the cursor buffer. These maps can race with each other in simple scenario where: a) buffer "a" mapped for update b) buffer "a" mapped for compare c) do the compare d) unmap "a" for compare e) update the cursor f) unmap "a" for update At step "e" the buffer has been unmapped and the read contents is bogus. Prevent unmapping of active read buffers by simply keeping a count of how many paths have currently active maps and unmap only when the count reaches 0. Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v5.19+ Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240816183332.31961-2-zack.rusin@broadcom.com Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala [Shivani: Modified to apply on v6.6.y] Signed-off-by: Shivani Agarwal Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 13 +++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index ae796e0c64aa..fdc34283eeb9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -331,6 +331,8 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) void *virtual; int ret; + atomic_inc(&vbo->map_count); + virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); if (virtual) return virtual; @@ -353,11 +355,17 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) */ void vmw_bo_unmap(struct vmw_bo *vbo) { + int map_count; + if (vbo->map.bo == NULL) return; - ttm_bo_kunmap(&vbo->map); - vbo->map.bo = NULL; + map_count = atomic_dec_return(&vbo->map_count); + + if (!map_count) { + ttm_bo_kunmap(&vbo->map); + vbo->map.bo = NULL; + } } @@ -390,6 +398,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; + atomic_set(&vmw_bo->map_count, 0); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index f349642e6190..156ea612fc2a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -68,6 +68,8 @@ struct vmw_bo_params { * @map: Kmap object for semi-persistent mappings * @res_tree: RB tree of resources using this buffer object as a backing MOB * @res_prios: Eviction priority counts for attached resources + * @map_count: The number of currently active maps. Will differ from the + * cpu_writers because it includes kernel maps. * @cpu_writers: Number of synccpu write grabs. Protected by reservation when * increased. May be decreased without reservation. * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB @@ -86,6 +88,7 @@ struct vmw_bo { struct rb_root res_tree; u32 res_prios[TTM_MAX_BO_PRIORITY]; + atomic_t map_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; From 79fec62d0f9bba88a6f9e2194db3eecace266d65 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Mon, 30 Sep 2024 15:33:27 +0800 Subject: [PATCH 381/534] Revert "net: libwx: fix alloc msix vectors failed" This reverts commit 69197dfc64007b5292cc960581548f41ccd44828. commit 937d46ecc5f9 ("net: wangxun: add ethtool_ops for channel number") changed NIC misc irq from most significant bit to least significant bit, the former condition is not required to apply this patch, because we only need to set irq affinity for NIC queue irq vectors. this patch is required after commit 937d46ecc5f9 ("net: wangxun: add ethtool_ops for channel number") was applied, so this is only relevant to 6.6.y branch. Signed-off-by: Duanqiang Wen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index bba44ff0e287..c37500aa0637 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1585,7 +1585,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = { .pre_vectors = 1 }; + struct irq_affinity affd = {0, }; int nvecs, i; nvecs = min_t(int, num_online_cpus(), wx->mac.max_msix_vectors); From 161fd69123b0360d27922e282f377067ab7f1dab Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 6 Aug 2024 09:56:48 +0200 Subject: [PATCH 382/534] xen: move checks for e820 conflicts further up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c4498ae316da5b5786ccd448fc555f3339b8e4ca upstream. Move the checks for e820 memory map conflicts using the xen_chk_is_e820_usable() helper further up in order to prepare resolving some of the possible conflicts by doing some e820 map modifications, which must happen before evaluating the RAM layout. Signed-off-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/setup.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 989c14b1ff6c..1a426d7c1d0d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -855,6 +855,28 @@ char * __init xen_memory_setup(void) /* Make sure the Xen-supplied memory map is well-ordered. */ e820__update_table(&xen_e820_table); + /* + * Check whether the kernel itself conflicts with the target E820 map. + * Failing now is better than running into weird problems later due + * to relocating (and even reusing) pages with kernel text or data. + */ + xen_chk_is_e820_usable(__pa_symbol(_text), + __pa_symbol(_end) - __pa_symbol(_text), + "kernel"); + + /* + * Check for a conflict of the xen_start_info memory with the target + * E820 map. + */ + xen_chk_is_e820_usable(__pa(xen_start_info), sizeof(*xen_start_info), + "xen_start_info"); + + /* + * Check for a conflict of the hypervisor supplied page tables with + * the target E820 map. + */ + xen_pt_check_e820(); + max_pages = xen_get_max_pages(); /* How many extra pages do we need due to remapping? */ @@ -927,28 +949,6 @@ char * __init xen_memory_setup(void) e820__update_table(e820_table); - /* - * Check whether the kernel itself conflicts with the target E820 map. - * Failing now is better than running into weird problems later due - * to relocating (and even reusing) pages with kernel text or data. - */ - xen_chk_is_e820_usable(__pa_symbol(_text), - __pa_symbol(_end) - __pa_symbol(_text), - "kernel"); - - /* - * Check for a conflict of the xen_start_info memory with the target - * E820 map. - */ - xen_chk_is_e820_usable(__pa(xen_start_info), sizeof(*xen_start_info), - "xen_start_info"); - - /* - * Check for a conflict of the hypervisor supplied page tables with - * the target E820 map. - */ - xen_pt_check_e820(); - xen_reserve_xen_mfnlist(); /* Check for a conflict of the initrd with the target E820 map. */ From adbb44539b56f15dc94701a43aa21ded41635c99 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 9 Aug 2024 17:52:55 +0200 Subject: [PATCH 383/534] xen: allow mapping ACPI data using a different physical address commit 9221222c717dbddac1e3c49906525475d87a3a44 upstream. When running as a Xen PV dom0 the system needs to map ACPI data of the host using host physical addresses, while those addresses can conflict with the guest physical addresses of the loaded linux kernel. The same problem might apply in case a PV guest is configured to use the host memory map. This conflict can be solved by mapping the ACPI data to a different guest physical address, but mapping the data via acpi_os_ioremap() must still be possible using the host physical address, as this address might be generated by AML when referencing some of the ACPI data. When configured to support running as a Xen PV domain, have an implementation of acpi_os_ioremap() being aware of the possibility to need above mentioned translation of a host physical address to the guest physical address. This modification requires to #include linux/acpi.h in some sources which need to include asm/acpi.h directly. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/acpi.h | 8 +++++++ arch/x86/kernel/acpi/boot.c | 11 ++++++++++ arch/x86/kernel/jailhouse.c | 1 + arch/x86/kernel/mmconf-fam10h_64.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/x86_init.c | 1 + arch/x86/xen/p2m.c | 35 ++++++++++++++++++++++++++++++ arch/x86/xen/setup.c | 2 +- 8 files changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index f896eed4516c..529c36a98d9e 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -165,6 +165,14 @@ void acpi_generic_reduced_hw_init(void); void x86_default_set_root_pointer(u64 addr); u64 x86_default_get_root_pointer(void); +#ifdef CONFIG_XEN_PV +/* A Xen PV domain needs a special acpi_os_ioremap() handling. */ +extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, + acpi_size size); +void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +#define acpi_os_ioremap acpi_os_ioremap +#endif + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c55c0ef47a18..49c39f5dc1c9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1901,3 +1901,14 @@ u64 x86_default_get_root_pointer(void) { return boot_params.acpi_rsdp_addr; } + +#ifdef CONFIG_XEN_PV +void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + return ioremap_cache(phys, size); +} + +void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, acpi_size size) = + x86_acpi_os_ioremap; +EXPORT_SYMBOL_GPL(acpi_os_ioremap); +#endif diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 578d16fc040f..5481c7c5db30 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index c94dec6a1834..1f54eedc3015 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a187c0cbd5b..ce77dac9a020 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3f0718b4a7d2..268627a17cf0 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6cb8dae768fa..11b5c042d4fa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -836,6 +837,34 @@ void __init xen_do_remap_nonram(void) pr_info("Remapped %u non-RAM page(s)\n", remapped); } +#ifdef CONFIG_ACPI +/* + * Xen variant of acpi_os_ioremap() taking potentially remapped non-RAM + * regions into account. + * Any attempt to map an area crossing a remap boundary will produce a + * WARN() splat. + * phys is related to remap->maddr on input and will be rebased to remap->paddr. + */ +static void __iomem *xen_acpi_os_ioremap(acpi_physical_address phys, + acpi_size size) +{ + unsigned int i; + const struct nonram_remap *remap = xen_nonram_remap; + + for (i = 0; i < nr_nonram_remap; i++) { + if (phys + size > remap->maddr && + phys < remap->maddr + remap->size) { + WARN_ON(phys < remap->maddr || + phys + size > remap->maddr + remap->size); + phys += remap->paddr - remap->maddr; + break; + } + } + + return x86_acpi_os_ioremap(phys, size); +} +#endif /* CONFIG_ACPI */ + /* * Add a new non-RAM remap entry. * In case of no free entry found, just crash the system. @@ -850,6 +879,12 @@ void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, BUG(); } +#ifdef CONFIG_ACPI + /* Switch to the Xen acpi_os_ioremap() variant. */ + if (nr_nonram_remap == 0) + acpi_os_ioremap = xen_acpi_os_ioremap; +#endif + xen_nonram_remap[nr_nonram_remap].maddr = maddr; xen_nonram_remap[nr_nonram_remap].paddr = paddr; xen_nonram_remap[nr_nonram_remap].size = size; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1a426d7c1d0d..dc822124cacb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -15,12 +15,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include From 859f62a2f904d916488bbc62fe8d1b13e749b4e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Sep 2024 02:58:06 -0600 Subject: [PATCH 384/534] io_uring/sqpoll: retain test for whether the CPU is valid commit a09c17240bdf2e9fa6d0591afa9448b59785f7d4 upstream. A recent commit ensured that SQPOLL cannot be setup with a CPU that isn't in the current tasks cpuset, but it also dropped testing whether the CPU is valid in the first place. Without that, if a task passes in a CPU value that is too high, the following KASAN splat can get triggered: BUG: KASAN: stack-out-of-bounds in io_sq_offload_create+0x858/0xaa4 Read of size 8 at addr ffff800089bc7b90 by task wq-aff.t/1391 CPU: 4 UID: 1000 PID: 1391 Comm: wq-aff.t Not tainted 6.11.0-rc7-00227-g371c468f4db6 #7080 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace.part.0+0xcc/0xe0 show_stack+0x14/0x1c dump_stack_lvl+0x58/0x74 print_report+0x16c/0x4c8 kasan_report+0x9c/0xe4 __asan_report_load8_noabort+0x1c/0x24 io_sq_offload_create+0x858/0xaa4 io_uring_setup+0x1394/0x17c4 __arm64_sys_io_uring_setup+0x6c/0x180 invoke_syscall+0x6c/0x260 el0_svc_common.constprop.0+0x158/0x224 do_el0_svc+0x3c/0x5c el0_svc+0x34/0x70 el0t_64_sync_handler+0x118/0x124 el0t_64_sync+0x168/0x16c The buggy address belongs to stack of task wq-aff.t/1391 and is located at offset 48 in frame: io_sq_offload_create+0x0/0xaa4 This frame has 1 object: [32, 40) 'allowed_mask' The buggy address belongs to the virtual mapping at [ffff800089bc0000, ffff800089bc9000) created by: kernel_clone+0x124/0x7e0 The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff0000d740af80 pfn:0x11740a memcg:ffff0000c2706f02 flags: 0xbffe00000000000(node=0|zone=2|lastcpupid=0x1fff) raw: 0bffe00000000000 0000000000000000 dead000000000122 0000000000000000 raw: ffff0000d740af80 0000000000000000 00000001ffffffff ffff0000c2706f02 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff800089bc7a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff800089bc7b00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 >ffff800089bc7b80: 00 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffff800089bc7c00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 ffff800089bc7c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409161632.cbeeca0d-lkp@intel.com Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset") Tested-by: Felix Moessbauer Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/sqpoll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 824db6f6ba23..545789386d52 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -406,6 +406,8 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, int cpu = p->sq_thread_cpu; ret = -EINVAL; + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + goto err_sqpoll; cpuset_cpus_allowed(current, &allowed_mask); if (!cpumask_test_cpu(cpu, &allowed_mask)) goto err_sqpoll; From 01ad0576f09210a8d1232077c4058e587e4e26cb Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Mon, 16 Sep 2024 13:11:50 +0200 Subject: [PATCH 385/534] io_uring/sqpoll: do not put cpumask on stack commit 7f44beadcc11adb98220556d2ddbe9c97aa6d42d upstream. Putting the cpumask on the stack is deprecated for a long time (since 2d3854a37e8), as these can be big. Given that, change the on-stack allocation of allowed_mask to be dynamically allocated. Fixes: f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset") Signed-off-by: Felix Moessbauer Link: https://lore.kernel.org/r/20240916111150.1266191-1-felix.moessbauer@siemens.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/sqpoll.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 545789386d52..cdf8b567cb94 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -402,15 +402,22 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, return 0; if (p->flags & IORING_SETUP_SQ_AFF) { - struct cpumask allowed_mask; + cpumask_var_t allowed_mask; int cpu = p->sq_thread_cpu; ret = -EINVAL; if (cpu >= nr_cpu_ids || !cpu_online(cpu)) goto err_sqpoll; - cpuset_cpus_allowed(current, &allowed_mask); - if (!cpumask_test_cpu(cpu, &allowed_mask)) + ret = -ENOMEM; + if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL)) goto err_sqpoll; + ret = -EINVAL; + cpuset_cpus_allowed(current, allowed_mask); + if (!cpumask_test_cpu(cpu, allowed_mask)) { + free_cpumask_var(allowed_mask); + goto err_sqpoll; + } + free_cpumask_var(allowed_mask); sqd->sq_cpu = cpu; } else { sqd->sq_cpu = -1; From 38dee6edb700d381d5ade626c67a7d63aca97a5e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 29 Jul 2024 18:57:38 +0300 Subject: [PATCH 386/534] Remove *.orig pattern from .gitignore commit 76be4f5a784533c71afbbb1b8f2963ef9e2ee258 upstream. Commit 3f1b0e1f2875 (".gitignore update") added *.orig and *.rej patterns to .gitignore in v2.6.23. The commit message didn't give a rationale. Later on, commit 1f5d3a6b6532 ("Remove *.rej pattern from .gitignore") removed the *.rej pattern in v2.6.26, on the rationale that *.rej files indicated something went really wrong and should not be ignored. The *.rej files are now shown by `git status`, which helps located conflicts when applying patches and lowers the probability that they will go unnoticed. It is however still easy to overlook the *.orig files which slowly polute the source tree. That's not as big of a deal as not noticing a conflict, but it's still not nice. Drop the *.orig pattern from .gitignore to avoid this and help keep the source tree clean. Signed-off-by: Laurent Pinchart [masahiroy@kernel.org: I do not have a strong opinion about this. Perhaps some people may have a different opinion. If you are someone who wants to ignore *.orig, it is likely you would want to do so across all projects. Then, $XDG_CONFIG_HOME/git/ignore would be more suitable for your needs. gitignore(5) suggests, "Patterns which a user wants Git to ignore in all situations generally go into a file specified by core.excludesFile in the user's ~/.gitconfig". Please note that you cannot do the opposite; if *.orig is ignored by the project's .gitignore, you cannot override the decision because $XDG_CONFIG_HOME/git/ignore has a lower priority. If *.orig is sitting on the fence, I'd leave it to the users. ] Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0bbae167bf93..d1a8ab3f98aa 100644 --- a/.gitignore +++ b/.gitignore @@ -135,7 +135,6 @@ GTAGS # id-utils files ID -*.orig *~ \#*# From fb17695735743c35437969d0072a17fd6af68ea9 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 9 Aug 2024 14:24:51 +0100 Subject: [PATCH 387/534] PCI: Revert to the original speed after PCIe failed link retraining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f68dea13405c94381d08f42dbf0416261622bdad upstream. When `pcie_failed_link_retrain' has failed to retrain the link by hand it leaves the link speed restricted to 2.5GT/s, which will then affect any device that has been plugged in later on, which may not suffer from the problem that caused the speed restriction to have been attempted. Consequently such a downstream device will suffer from an unnecessary communication throughput limitation and therefore performance loss. Remove the speed restriction then and revert the Link Control 2 register to its original state if link retraining with the speed restriction in place has failed. Retrain the link again afterwards so as to remove any residual state, waiting on LT rather than DLLLA to avoid an excessive delay and ignoring the result as this training is supposed to fail anyway. Fixes: a89c82249c37 ("PCI: Work around PCIe link training failures") Link: https://lore.kernel.org/linux-pci/alpine.DEB.2.21.2408251412590.30766@angie.orcam.me.uk Reported-by: Matthew W Carlis Link: https://lore.kernel.org/r/20240806000659.30859-1-mattc@purestorage.com/ Link: https://lore.kernel.org/r/20240722193407.23255-1-mattc@purestorage.com/ Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Ilpo Järvinen Cc: # v6.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ec4277d7835b..e3d57065f777 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -66,7 +66,7 @@ * apply this erratum workaround to any downstream ports as long as they * support Link Active reporting and have the Link Control 2 register. * Restrict the speed to 2.5GT/s then with the Target Link Speed field, - * request a retrain and wait 200ms for the data link to go up. + * request a retrain and check the result. * * If this turns out successful and we know by the Vendor:Device ID it is * safe to do so, then lift the restriction, letting the devices negotiate @@ -74,6 +74,10 @@ * firmware may have already arranged and lift it with ports that already * report their data link being up. * + * Otherwise revert the speed to the original setting and request a retrain + * again to remove any residual state, ignoring the result as it's supposed + * to fail anyway. + * * Return TRUE if the link has been successfully retrained, otherwise FALSE. */ bool pcie_failed_link_retrain(struct pci_dev *dev) @@ -92,6 +96,8 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); if ((lnksta & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_DLLLA)) == PCI_EXP_LNKSTA_LBMS) { + u16 oldlnkctl2 = lnkctl2; + pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; @@ -100,6 +106,9 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) if (pcie_retrain_link(dev, false)) { pci_info(dev, "retraining failed\n"); + pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, + oldlnkctl2); + pcie_retrain_link(dev, true); return false; } From 894f21117f638f9afc6021c383c751dc96b6e138 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 9 Aug 2024 14:24:46 +0100 Subject: [PATCH 388/534] PCI: Clear the LBMS bit after a link retrain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8037ac08c2bbb3186f83a5a924f52d1048dbaec5 upstream. The LBMS bit, where implemented, is set by hardware either in response to the completion of retraining caused by writing 1 to the Retrain Link bit or whenever hardware has changed the link speed or width in attempt to correct unreliable link operation. It is never cleared by hardware other than by software writing 1 to the bit position in the Link Status register and we never do such a write. We currently have two places, namely apply_bad_link_workaround() and pcie_failed_link_retrain() in drivers/pci/controller/dwc/pcie-tegra194.c and drivers/pci/quirks.c respectively where we check the state of the LBMS bit and neither is interested in the state of the bit resulting from the completion of retraining, both check for a link fault. And in particular pcie_failed_link_retrain() causes issues consequently, by trying to retrain a link where there's no downstream device anymore and the state of 1 in the LBMS bit has been retained from when there was a device downstream that has since been removed. Clear the LBMS bit then at the conclusion of pcie_retrain_link(), so that we have a single place that controls it and that our code can track link speed or width changes resulting from unreliable link operation. Fixes: a89c82249c37 ("PCI: Work around PCIe link training failures") Link: https://lore.kernel.org/r/alpine.DEB.2.21.2408091133140.61955@angie.orcam.me.uk Reported-by: Matthew W Carlis Link: https://lore.kernel.org/r/20240806000659.30859-1-mattc@purestorage.com/ Link: https://lore.kernel.org/r/20240722193407.23255-1-mattc@purestorage.com/ Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Cc: # v6.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index da52f98d8f7f..68a1e9fac661 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5017,7 +5017,15 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL); } - return pcie_wait_for_link_status(pdev, use_lt, !use_lt); + rc = pcie_wait_for_link_status(pdev, use_lt, !use_lt); + + /* + * Clear LBMS after a manual retrain so that the bit can be used + * to track link speed or width changes made by hardware itself + * in attempt to correct unreliable link operation. + */ + pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); + return rc; } /** From b91d041e0756590fe5cdd9b2ec81f34ae5c024fc Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 27 Aug 2024 17:54:21 +0530 Subject: [PATCH 389/534] PCI: dra7xx: Fix threaded IRQ request for "dra7xx-pcie-main" IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03f84b3baba7836bdfc162c19288d5ce1aa92890 upstream. Commit da87d35a6e51 ("PCI: dra7xx: Use threaded IRQ handler for "dra7xx-pcie-main" IRQ") switched from devm_request_irq() to devm_request_threaded_irq() for the "dra7xx-pcie-main" interrupt. Since the primary handler was set to NULL, the "IRQF_ONESHOT" flag should have also been set. Fix this. Fixes: da87d35a6e51 ("PCI: dra7xx: Use threaded IRQ handler for "dra7xx-pcie-main" IRQ") Suggested-by: Vignesh Raghavendra Link: https://lore.kernel.org/linux-pci/20240827122422.985547-2-s-vadapalli@ti.com Reported-by: Udit Kumar Signed-off-by: Siddharth Vadapalli Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kevin Hilman Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-dra7xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c index b445ffe95e3f..0e29a76ca530 100644 --- a/drivers/pci/controller/dwc/pci-dra7xx.c +++ b/drivers/pci/controller/dwc/pci-dra7xx.c @@ -841,7 +841,8 @@ static int dra7xx_pcie_probe(struct platform_device *pdev) dra7xx->mode = mode; ret = devm_request_threaded_irq(dev, irq, NULL, dra7xx_pcie_irq_handler, - IRQF_SHARED, "dra7xx-pcie-main", dra7xx); + IRQF_SHARED | IRQF_ONESHOT, + "dra7xx-pcie-main", dra7xx); if (ret) { dev_err(dev, "failed to request irq\n"); goto err_gpio; From f23785c6e7d30c941e5844c370f58169b518fd49 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 29 Jul 2024 16:18:10 -0400 Subject: [PATCH 390/534] PCI: imx6: Fix missing call to phy_power_off() in error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b04d44d5c74e4d8aab1678496b84700b4b343fe upstream. Fix missing call to phy_power_off() in the error path of imx6_pcie_host_init(). Remove unnecessary check for imx6_pcie->phy as the PHY API already handles NULL pointers. Fixes: cbcf8722b523 ("phy: freescale: imx8m-pcie: Fix the wrong order of phy_init() and phy_power_on()") Link: https://lore.kernel.org/linux-pci/20240729-pci2_upstream-v8-3-b68ee5ef2b4d@nxp.com Signed-off-by: Frank Li [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Cc: # 6.1+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-imx6.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 74703362aeec..86b09b5d7f24 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -997,7 +997,7 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) ret = phy_power_on(imx6_pcie->phy); if (ret) { dev_err(dev, "waiting for PHY ready timeout!\n"); - goto err_phy_off; + goto err_phy_exit; } } @@ -1012,8 +1012,9 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) return 0; err_phy_off: - if (imx6_pcie->phy) - phy_exit(imx6_pcie->phy); + phy_power_off(imx6_pcie->phy); +err_phy_exit: + phy_exit(imx6_pcie->phy); err_clk_disable: imx6_pcie_clk_disable(imx6_pcie); err_reg_disable: From a200897dc704164fabdcea8205334f5cbb97051f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 9 Aug 2024 14:24:56 +0100 Subject: [PATCH 391/534] PCI: Correct error reporting with PCIe failed link retraining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 712e49c967064a3a7a5738c6f65ac540a3f6a1df upstream. Only return successful completion status from pcie_failed_link_retrain() if retraining has actually been done, preventing excessive delays from being triggered at call sites in a hope that communication will finally be established with the downstream device where in fact nothing has been done about the link in question that would justify such a hope. Fixes: a89c82249c37 ("PCI: Work around PCIe link training failures") Link: https://lore.kernel.org/r/alpine.DEB.2.21.2408091133260.61955@angie.orcam.me.uk Reported-by: Ilpo Järvinen Link: https://lore.kernel.org/r/aa2d1c4e-9961-d54a-00c7-ddf8e858a9b0@linux.intel.com/ Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Ilpo Järvinen Cc: # v6.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e3d57065f777..ced48e955b2c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -78,7 +78,8 @@ * again to remove any residual state, ignoring the result as it's supposed * to fail anyway. * - * Return TRUE if the link has been successfully retrained, otherwise FALSE. + * Return TRUE if the link has been successfully retrained. Return FALSE + * if retraining was not needed or we attempted a retrain and it failed. */ bool pcie_failed_link_retrain(struct pci_dev *dev) { @@ -87,6 +88,7 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) {} }; u16 lnksta, lnkctl2; + bool ret = false; if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) || !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting) @@ -104,7 +106,8 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT; pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - if (pcie_retrain_link(dev, false)) { + ret = pcie_retrain_link(dev, false) == 0; + if (!ret) { pci_info(dev, "retraining failed\n"); pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, oldlnkctl2); @@ -126,13 +129,14 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS; pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - if (pcie_retrain_link(dev, false)) { + ret = pcie_retrain_link(dev, false) == 0; + if (!ret) { pci_info(dev, "retraining failed\n"); return false; } } - return true; + return ret; } static ktime_t fixup_debug_start(struct pci_dev *dev, From 3d8573abdc65edd367d25df159f6be82d9b7acbf Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 9 Aug 2024 14:25:02 +0100 Subject: [PATCH 392/534] PCI: Use an error code with PCIe failed link retraining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 59100eb248c0b15585affa546c7f6834b30eb5a4 upstream. Given how the call place in pcie_wait_for_link_delay() got structured now, and that pcie_retrain_link() returns a potentially useful error code, convert pcie_failed_link_retrain() to return an error code rather than a boolean status, fixing handling at the call site mentioned. Update the other call site accordingly. Fixes: 1abb47390350 ("Merge branch 'pci/enumeration'") Link: https://lore.kernel.org/r/alpine.DEB.2.21.2408091156530.61955@angie.orcam.me.uk Reported-by: Ilpo Järvinen Link: https://lore.kernel.org/r/aa2d1c4e-9961-d54a-00c7-ddf8e858a9b0@linux.intel.com/ Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Ilpo Järvinen Cc: # v6.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 6 +++--- drivers/pci/quirks.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 68a1e9fac661..93f2f4dcf6d6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1208,7 +1208,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) if (delay > PCI_RESET_WAIT) { if (retrain) { retrain = false; - if (pcie_failed_link_retrain(bridge)) { + if (pcie_failed_link_retrain(bridge) == 0) { delay = 1; continue; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2cc032e8cbb9..d5e9010a135a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -530,7 +530,7 @@ void pci_acs_init(struct pci_dev *dev); int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); int pci_dev_specific_enable_acs(struct pci_dev *dev); int pci_dev_specific_disable_acs_redir(struct pci_dev *dev); -bool pcie_failed_link_retrain(struct pci_dev *dev); +int pcie_failed_link_retrain(struct pci_dev *dev); #else static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) @@ -545,9 +545,9 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) { return -ENOTTY; } -static inline bool pcie_failed_link_retrain(struct pci_dev *dev) +static inline int pcie_failed_link_retrain(struct pci_dev *dev) { - return false; + return -ENOTTY; } #endif diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ced48e955b2c..0b08ac45effb 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -78,21 +78,21 @@ * again to remove any residual state, ignoring the result as it's supposed * to fail anyway. * - * Return TRUE if the link has been successfully retrained. Return FALSE + * Return 0 if the link has been successfully retrained. Return an error * if retraining was not needed or we attempted a retrain and it failed. */ -bool pcie_failed_link_retrain(struct pci_dev *dev) +int pcie_failed_link_retrain(struct pci_dev *dev) { static const struct pci_device_id ids[] = { { PCI_VDEVICE(ASMEDIA, 0x2824) }, /* ASMedia ASM2824 */ {} }; u16 lnksta, lnkctl2; - bool ret = false; + int ret = -ENOTTY; if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) || !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting) - return false; + return ret; pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); @@ -106,13 +106,13 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT; pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - ret = pcie_retrain_link(dev, false) == 0; - if (!ret) { + ret = pcie_retrain_link(dev, false); + if (ret) { pci_info(dev, "retraining failed\n"); pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, oldlnkctl2); pcie_retrain_link(dev, true); - return false; + return ret; } pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); @@ -129,10 +129,10 @@ bool pcie_failed_link_retrain(struct pci_dev *dev) lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS; pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - ret = pcie_retrain_link(dev, false) == 0; - if (!ret) { + ret = pcie_retrain_link(dev, false); + if (ret) { pci_info(dev, "retraining failed\n"); - return false; + return ret; } } From 02a370c4fc0f1b70ca32302d6689a708c0dac781 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 31 May 2024 12:13:32 -0400 Subject: [PATCH 393/534] PCI: xilinx-nwl: Fix off-by-one in INTx IRQ handler commit 0199d2f2bd8cd97b310f7ed82a067247d7456029 upstream. MSGF_LEG_MASK is laid out with INTA in bit 0, INTB in bit 1, INTC in bit 2, and INTD in bit 3. Hardware IRQ numbers start at 0, and we register PCI_NUM_INTX IRQs. So to enable INTA (aka hwirq 0) we should set bit 0. Remove the subtraction of one. This bug would cause INTx interrupts not to be delivered, as enabling INTB would actually enable INTA, and enabling INTA wouldn't enable anything at all. It is likely that this got overlooked for so long since most PCIe hardware uses MSIs. This fixes the following UBSAN error: UBSAN: shift-out-of-bounds in ../drivers/pci/controller/pcie-xilinx-nwl.c:389:11 shift exponent 18446744073709551615 is too large for 32-bit type 'int' CPU: 1 PID: 61 Comm: kworker/u10:1 Not tainted 6.6.20+ #268 Hardware name: xlnx,zynqmp (DT) Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace (arch/arm64/kernel/stacktrace.c:235) show_stack (arch/arm64/kernel/stacktrace.c:242) dump_stack_lvl (lib/dump_stack.c:107) dump_stack (lib/dump_stack.c:114) __ubsan_handle_shift_out_of_bounds (lib/ubsan.c:218 lib/ubsan.c:387) nwl_unmask_leg_irq (drivers/pci/controller/pcie-xilinx-nwl.c:389 (discriminator 1)) irq_enable (kernel/irq/internals.h:234 kernel/irq/chip.c:170 kernel/irq/chip.c:439 kernel/irq/chip.c:432 kernel/irq/chip.c:345) __irq_startup (kernel/irq/internals.h:239 kernel/irq/chip.c:180 kernel/irq/chip.c:250) irq_startup (kernel/irq/chip.c:270) __setup_irq (kernel/irq/manage.c:1800) request_threaded_irq (kernel/irq/manage.c:2206) pcie_pme_probe (include/linux/interrupt.h:168 drivers/pci/pcie/pme.c:348) Fixes: 9a181e1093af ("PCI: xilinx-nwl: Modify IRQ chip for legacy interrupts") Link: https://lore.kernel.org/r/20240531161337.864994-3-sean.anderson@linux.dev Signed-off-by: Sean Anderson Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-xilinx-nwl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 60afbb51511c..5b82098f32b7 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -373,7 +373,7 @@ static void nwl_mask_leg_irq(struct irq_data *data) u32 mask; u32 val; - mask = 1 << (data->hwirq - 1); + mask = 1 << data->hwirq; raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); nwl_bridge_writel(pcie, (val & (~mask)), MSGF_LEG_MASK); @@ -387,7 +387,7 @@ static void nwl_unmask_leg_irq(struct irq_data *data) u32 mask; u32 val; - mask = 1 << (data->hwirq - 1); + mask = 1 << data->hwirq; raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); nwl_bridge_writel(pcie, (val | mask), MSGF_LEG_MASK); From 513d60f419452f433c507f26e3056faabfcb3441 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 29 Jul 2024 22:52:14 +0300 Subject: [PATCH 394/534] Revert "soc: qcom: smd-rpm: Match rpmsg channel instead of compatible" commit b17155133391d7f6dd18d3fb94a7d492fdec18fa upstream. The rpm_requests device nodes have the compatible node. As such the rpmsg core uses OF modalias instead of a native rpmsg modalias. Thus if smd-rpm is built as a module, it doesn't get autoloaded for the device. Revert the commit bcabe1e09135 ("soc: qcom: smd-rpm: Match rpmsg channel instead of compatible") Fixes: bcabe1e09135 ("soc: qcom: smd-rpm: Match rpmsg channel instead of compatible") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240729-fix-smd-rpm-v2-1-0776408a94c5@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/smd-rpm.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/soc/qcom/smd-rpm.c b/drivers/soc/qcom/smd-rpm.c index f9fd6177118c..577f1f25ab10 100644 --- a/drivers/soc/qcom/smd-rpm.c +++ b/drivers/soc/qcom/smd-rpm.c @@ -196,9 +196,6 @@ static int qcom_smd_rpm_probe(struct rpmsg_device *rpdev) { struct qcom_smd_rpm *rpm; - if (!rpdev->dev.of_node) - return -EINVAL; - rpm = devm_kzalloc(&rpdev->dev, sizeof(*rpm), GFP_KERNEL); if (!rpm) return -ENOMEM; @@ -218,18 +215,38 @@ static void qcom_smd_rpm_remove(struct rpmsg_device *rpdev) of_platform_depopulate(&rpdev->dev); } -static const struct rpmsg_device_id qcom_smd_rpm_id_table[] = { - { .name = "rpm_requests", }, - { /* sentinel */ } +static const struct of_device_id qcom_smd_rpm_of_match[] = { + { .compatible = "qcom,rpm-apq8084" }, + { .compatible = "qcom,rpm-ipq6018" }, + { .compatible = "qcom,rpm-ipq9574" }, + { .compatible = "qcom,rpm-msm8226" }, + { .compatible = "qcom,rpm-msm8909" }, + { .compatible = "qcom,rpm-msm8916" }, + { .compatible = "qcom,rpm-msm8936" }, + { .compatible = "qcom,rpm-msm8953" }, + { .compatible = "qcom,rpm-msm8974" }, + { .compatible = "qcom,rpm-msm8976" }, + { .compatible = "qcom,rpm-msm8994" }, + { .compatible = "qcom,rpm-msm8996" }, + { .compatible = "qcom,rpm-msm8998" }, + { .compatible = "qcom,rpm-sdm660" }, + { .compatible = "qcom,rpm-sm6115" }, + { .compatible = "qcom,rpm-sm6125" }, + { .compatible = "qcom,rpm-sm6375" }, + { .compatible = "qcom,rpm-qcm2290" }, + { .compatible = "qcom,rpm-qcs404" }, + {} }; -MODULE_DEVICE_TABLE(rpmsg, qcom_smd_rpm_id_table); +MODULE_DEVICE_TABLE(of, qcom_smd_rpm_of_match); static struct rpmsg_driver qcom_smd_rpm_driver = { .probe = qcom_smd_rpm_probe, .remove = qcom_smd_rpm_remove, .callback = qcom_smd_rpm_callback, - .id_table = qcom_smd_rpm_id_table, - .drv.name = "qcom_smd_rpm", + .drv = { + .name = "qcom_smd_rpm", + .of_match_table = qcom_smd_rpm_of_match, + }, }; static int __init qcom_smd_rpm_init(void) From 543a3c7dbd5b90970ee14abd3db545146e37b109 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Fri, 30 Aug 2024 22:31:54 +0800 Subject: [PATCH 395/534] ASoC: rt5682: Return devm_of_clk_add_hw_provider to transfer the error commit fcca6d05ef49d5650514ea1dcfd12e4ae3ff2be6 upstream. Return devm_of_clk_add_hw_provider() in order to transfer the error, if it fails due to resource allocation failure or device tree clock provider registration failure. Cc: stable@vger.kernel.org Fixes: ebbfabc16d23 ("ASoC: rt5682: Add CCF usage for providing I2S clks") Signed-off-by: Ma Ke Link: https://patch.msgid.link/20240830143154.3448004-1-make24@iscas.ac.cn Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5682.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index e3aca9c785a0..aa163ec40862 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2903,8 +2903,10 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682) } if (dev->of_node) { - devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, dai_clk_hw); + if (ret) + return ret; } else { ret = devm_clk_hw_register_clkdev(dev, dai_clk_hw, init.name, From c3533bf2ed1d89fa9354ced84d6abf8ab1135388 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 8 Aug 2024 09:10:56 +0200 Subject: [PATCH 396/534] soc: fsl: cpm1: tsa: Fix tsa_write8() commit 47a347bae9a491b467ab3543e4725a3e4fbe30f5 upstream. The tsa_write8() parameter is an u32 value. This is not consistent with the function itself. Indeed, tsa_write8() writes an 8bits value. Be consistent and use an u8 parameter value. Fixes: 1d4ba0b81c1c ("soc: fsl: cpm1: Add support for TSA") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20240808071132.149251-4-herve.codina@bootlin.com Signed-off-by: Christophe Leroy Signed-off-by: Greg Kroah-Hartman --- drivers/soc/fsl/qe/tsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/tsa.c b/drivers/soc/fsl/qe/tsa.c index e0527b9efd05..1bbc9af1e50b 100644 --- a/drivers/soc/fsl/qe/tsa.c +++ b/drivers/soc/fsl/qe/tsa.c @@ -140,7 +140,7 @@ static inline void tsa_write32(void __iomem *addr, u32 val) iowrite32be(val, addr); } -static inline void tsa_write8(void __iomem *addr, u32 val) +static inline void tsa_write8(void __iomem *addr, u8 val) { iowrite8(val, addr); } From a4c2fbed2037ec0acddee3ca0425f175c5c7a3aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 Aug 2024 20:05:22 +0200 Subject: [PATCH 397/534] soc: versatile: integrator: fix OF node leak in probe() error path commit 874c5b601856adbfda10846b9770a6c66c41e229 upstream. Driver is leaking OF node reference obtained from of_find_matching_node(). Fixes: f956a785a282 ("soc: move SoC driver for the ARM Integrator") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-1-ff4b35abed83@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/soc/versatile/soc-integrator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/versatile/soc-integrator.c b/drivers/soc/versatile/soc-integrator.c index bab4ad87aa75..d5099a3386b4 100644 --- a/drivers/soc/versatile/soc-integrator.c +++ b/drivers/soc/versatile/soc-integrator.c @@ -113,6 +113,7 @@ static int __init integrator_soc_init(void) return -ENODEV; syscon_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); From e48edd4762910d6ab0bc5edf00af4397d7b28253 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 16 Jul 2024 12:10:40 +0300 Subject: [PATCH 398/534] Revert "media: tuners: fix error return code of hybrid_tuner_request_state()" commit e25cc4be4616fcf5689622b3226d648aab253cdb upstream. This reverts commit b9302fa7ed979e84b454e4ca92192cf485a4ed41. As Fedor Pchelkin pointed out, this commit violates the convention of using the macro return value, which causes errors. For example, in functions tda18271_attach(), xc5000_attach(), simple_tuner_attach(). Link: https://lore.kernel.org/linux-media/20240424202031.syigrtrtipbq5f2l@fpc/ Suggested-by: Fedor Pchelkin Signed-off-by: Roman Smirnov Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/tuner-i2c.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/tuners/tuner-i2c.h b/drivers/media/tuners/tuner-i2c.h index 07aeead0644a..724952e001cd 100644 --- a/drivers/media/tuners/tuner-i2c.h +++ b/drivers/media/tuners/tuner-i2c.h @@ -133,10 +133,8 @@ static inline int tuner_i2c_xfer_send_recv(struct tuner_i2c_props *props, } \ if (0 == __ret) { \ state = kzalloc(sizeof(type), GFP_KERNEL); \ - if (!state) { \ - __ret = -ENOMEM; \ + if (NULL == state) \ goto __fail; \ - } \ state->i2c_props.addr = i2caddr; \ state->i2c_props.adap = i2cadap; \ state->i2c_props.name = devname; \ From cd6dd564ae7d99967ef50078216929418160b30e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 27 Aug 2024 13:46:45 -0300 Subject: [PATCH 399/534] iommufd: Protect against overflow of ALIGN() during iova allocation commit 8f6887349b2f829a4121c518aeb064fc922714e4 upstream. Userspace can supply an iova and uptr such that the target iova alignment becomes really big and ALIGN() overflows which corrupts the selected area range during allocation. CONFIG_IOMMUFD_TEST can detect this: WARNING: CPU: 1 PID: 5092 at drivers/iommu/iommufd/io_pagetable.c:268 iopt_alloc_area_pages drivers/iommu/iommufd/io_pagetable.c:268 [inline] WARNING: CPU: 1 PID: 5092 at drivers/iommu/iommufd/io_pagetable.c:268 iopt_map_pages+0xf95/0x1050 drivers/iommu/iommufd/io_pagetable.c:352 Modules linked in: CPU: 1 PID: 5092 Comm: syz-executor294 Not tainted 6.10.0-rc5-syzkaller-00294-g3ffea9a7a6f7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 RIP: 0010:iopt_alloc_area_pages drivers/iommu/iommufd/io_pagetable.c:268 [inline] RIP: 0010:iopt_map_pages+0xf95/0x1050 drivers/iommu/iommufd/io_pagetable.c:352 Code: fc e9 a4 f3 ff ff e8 1a 8b 4c fc 41 be e4 ff ff ff e9 8a f3 ff ff e8 0a 8b 4c fc 90 0f 0b 90 e9 37 f5 ff ff e8 fc 8a 4c fc 90 <0f> 0b 90 e9 68 f3 ff ff 48 c7 c1 ec 82 ad 8f 80 e1 07 80 c1 03 38 RSP: 0018:ffffc90003ebf9e0 EFLAGS: 00010293 RAX: ffffffff85499fa4 RBX: 00000000ffffffef RCX: ffff888079b49e00 RDX: 0000000000000000 RSI: 00000000ffffffef RDI: 0000000000000000 RBP: ffffc90003ebfc50 R08: ffffffff85499b30 R09: ffffffff85499942 R10: 0000000000000002 R11: ffff888079b49e00 R12: ffff8880228e0010 R13: 0000000000000000 R14: 1ffff920007d7f68 R15: ffffc90003ebfd00 FS: 000055557d760380(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000005fdeb8 CR3: 000000007404a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: iommufd_ioas_copy+0x610/0x7b0 drivers/iommu/iommufd/ioas.c:274 iommufd_fops_ioctl+0x4d9/0x5a0 drivers/iommu/iommufd/main.c:421 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Cap the automatic alignment to the huge page size, which is probably a better idea overall. Huge automatic alignments can fragment and chew up the available IOVA space without any reason. Link: https://patch.msgid.link/r/0-v1-8009738b9891+1f7-iommufd_align_overflow_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: 51fe6141f0f6 ("iommufd: Data structure to provide IOVA to PFN mapping") Reviewed-by: Nicolin Chen Reported-by: syzbot+16073ebbc4c64b819b47@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/000000000000388410061a74f014@google.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommufd/io_pagetable.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 2d22c027aa59..e76b22939994 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -111,6 +111,7 @@ static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, unsigned long page_offset = uptr % PAGE_SIZE; struct interval_tree_double_span_iter used_span; struct interval_tree_span_iter allowed_span; + unsigned long max_alignment = PAGE_SIZE; unsigned long iova_alignment; lockdep_assert_held(&iopt->iova_rwsem); @@ -130,6 +131,13 @@ static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, roundup_pow_of_two(length), 1UL << __ffs64(uptr)); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + max_alignment = HPAGE_SIZE; +#endif + /* Protect against ALIGN() overflow */ + if (iova_alignment >= max_alignment) + iova_alignment = max_alignment; + if (iova_alignment < iopt->iova_alignment) return -EINVAL; From 2a26c3122d01704450e755d022fd06d1f002cf88 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 20 Sep 2024 09:22:52 +0200 Subject: [PATCH 400/534] Input: adp5588-keys - fix check on return code commit eb017f4ea13b1a5ad7f4332279f2e4c67b44bdea upstream. During adp5588_setup(), we read all the events to clear the event FIFO. However, adp5588_read() just calls i2c_smbus_read_byte_data() which returns the byte read in case everything goes well. Hence, we need to explicitly check for a negative error code instead of checking for something different than 0. Fixes: e960309ce318 ("Input: adp5588-keys - bail out on returned error") Cc: stable@vger.kernel.org Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240920-fix-adp5588-err-check-v1-1-81f6e957ef24@analog.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/adp5588-keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index 61e8e43e9c2b..489813274401 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -627,7 +627,7 @@ static int adp5588_setup(struct adp5588_kpad *kpad) for (i = 0; i < KEYP_MAX_EVENT; i++) { ret = adp5588_read(client, KEY_EVENTA); - if (ret) + if (ret < 0) return ret; } From c18dca92da2a1813d8bfd16a3e3a00108baab76a Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 5 Sep 2024 18:48:51 +0200 Subject: [PATCH 401/534] Input: i8042 - add TUXEDO Stellaris 16 Gen5 AMD to i8042 quirk table commit e06edf96dea065dd1d9df695bf8b92784992333e upstream. Some TongFang barebones have touchpad and/or keyboard issues after suspend, fixable with nomux + reset + noloop + nopnp. Luckily, none of them have an external PS/2 port so this can safely be set for all of them. I'm not entirely sure if every device listed really needs all four quirks, but after testing and production use, no negative effects could be observed when setting all four. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240905164851.771578-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index bad238f69a7a..1d73391f127b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1120,6 +1120,29 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOLOOP) }, + /* + * Some TongFang barebones have touchpad and/or keyboard issues after + * suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of + * them have an external PS/2 port so this can safely be set for all of + * them. + * TongFang barebones come with board_vendor and/or system_vendor set to + * a different value for each individual reseller. The only somewhat + * universal way to identify them is by board_name. + */ + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6XGxX"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxXGxX"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, From 09d94ac8b25fd0de6accb6c56916f8cbe48ecbc5 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 10 Sep 2024 11:40:08 +0200 Subject: [PATCH 402/534] Input: i8042 - add TUXEDO Stellaris 15 Slim Gen6 AMD to i8042 quirk table commit 3870e2850b56306d1d1e435c5a1ccbccd7c59291 upstream. The Gen6 devices have the same problem and the same Solution as the Gen5 ones. Some TongFang barebones have touchpad and/or keyboard issues after suspend, fixable with nomux + reset + noloop + nopnp. Luckily, none of them have an external PS/2 port so this can safely be set for all of them. I'm not entirely sure if every device listed really needs all four quirks, but after testing and production use, no negative effects could be observed when setting all four. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240910094008.1601230-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 1d73391f127b..6f54e651adfe 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1143,6 +1143,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxHGxx"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, From 0188ea5facba868195e1a749c450598d78f25ff4 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 10 Sep 2024 11:40:07 +0200 Subject: [PATCH 403/534] Input: i8042 - add another board name for TUXEDO Stellaris Gen5 AMD line commit 01eed86d50af9fab27d876fd677b86259ebe9de3 upstream. There might be devices out in the wild where the board name is GMxXGxx instead of GMxXGxX. Adding both to be on the safe side. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240910094008.1601230-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 6f54e651adfe..34d1f07ea4c3 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1136,6 +1136,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxXGxx"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GMxXGxX"), From d5d6489b9211788faa40b45a6c06f615f823f301 Mon Sep 17 00:00:00 2001 From: Snehal Koukuntla Date: Mon, 9 Sep 2024 18:01:54 +0000 Subject: [PATCH 404/534] KVM: arm64: Add memory length checks and remove inline in do_ffa_mem_xfer commit f26a525b77e040d584e967369af1e018d2d59112 upstream. When we share memory through FF-A and the description of the buffers exceeds the size of the mapped buffer, the fragmentation API is used. The fragmentation API allows specifying chunks of descriptors in subsequent FF-A fragment calls and no upper limit has been established for this. The entire memory region transferred is identified by a handle which can be used to reclaim the transferred memory. To be able to reclaim the memory, the description of the buffers has to fit in the ffa_desc_buf. Add a bounds check on the FF-A sharing path to prevent the memory reclaim from failing. Also do_ffa_mem_xfer() does not need __always_inline, except for the BUILD_BUG_ON() aspect, which gets moved to a macro. [maz: fixed the BUILD_BUG_ON() breakage with LLVM, thanks to Wei-Lin Chang for the timely report] Fixes: 634d90cf0ac65 ("KVM: arm64: Handle FFA_MEM_LEND calls from the host") Cc: stable@vger.kernel.org Reviewed-by: Sebastian Ene Signed-off-by: Snehal Koukuntla Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240909180154.3267939-1-snehalreddy@google.com Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/nvhe/ffa.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 6e4dba9eadef..8d21ab904f1a 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -415,9 +415,9 @@ out: return; } -static __always_inline void do_ffa_mem_xfer(const u64 func_id, - struct arm_smccc_res *res, - struct kvm_cpu_context *ctxt) +static void __do_ffa_mem_xfer(const u64 func_id, + struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, len, ctxt, 1); DECLARE_REG(u32, fraglen, ctxt, 2); @@ -428,9 +428,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, u32 offset, nr_ranges; int ret = 0; - BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE && - func_id != FFA_FN64_MEM_LEND); - if (addr_mbz || npages_mbz || fraglen > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { ret = FFA_RET_INVALID_PARAMETERS; @@ -449,6 +446,11 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } + if (len > ffa_desc_buf.len) { + ret = FFA_RET_NO_MEMORY; + goto out_unlock; + } + buf = hyp_buffers.tx; memcpy(buf, host_buffers.tx, fraglen); @@ -498,6 +500,13 @@ err_unshare: goto out_unlock; } +#define do_ffa_mem_xfer(fid, res, ctxt) \ + do { \ + BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \ + (fid) != FFA_FN64_MEM_LEND); \ + __do_ffa_mem_xfer((fid), (res), (ctxt)); \ + } while (0); + static void do_ffa_mem_reclaim(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { From 7eae461dc35763e8d1d4c7fd34f4a6ed41b83603 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:50:58 -0700 Subject: [PATCH 405/534] KVM: x86: Enforce x2APIC's must-be-zero reserved ICR bits commit 71bf395a276f0578d19e0ae137a7d1d816d08e0e upstream. Inject a #GP on a WRMSR(ICR) that attempts to set any reserved bits that are must-be-zero on both Intel and AMD, i.e. any reserved bits other than the BUSY bit, which Intel ignores and basically says is undefined. KVM's xapic_state_test selftest has been fudging the bug since commit 4b88b1a518b3 ("KVM: selftests: Enhance handling WRMSR ICR register in x2APIC mode"), which essentially removed the testcase instead of fixing the bug. WARN if the nodecode path triggers a #GP, as the CPU is supposed to check reserved bits for ICR when it's partially virtualized. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240719235107.3023592-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 66a2c4c0ae10..ec08fbee0cfa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2460,7 +2460,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) * maybe-unecessary write, and both are in the noise anyways. */ if (apic_x2apic_mode(apic) && offset == APIC_ICR) - kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } @@ -3153,8 +3153,21 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) { + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ data &= ~APIC_ICR_BUSY; kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); From beef3353c601d1c6093328be6c938ccf5592ec5d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2024 16:50:59 -0700 Subject: [PATCH 406/534] KVM: x86: Move x2APIC ICR helper above kvm_apic_write_nodecode() commit d33234342f8b468e719e05649fd26549fb37ef8a upstream. Hoist kvm_x2apic_icr_write() above kvm_apic_write_nodecode() so that a local helper to _read_ the x2APIC ICR can be added and used in the nodecode path without needing a forward declaration. No functional change intended. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240719235107.3023592-3-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 46 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ec08fbee0cfa..1380f3489777 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2443,6 +2443,29 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + +int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +{ + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + kvm_lapic_set_reg64(apic, APIC_ICR, data); + trace_kvm_apic_write(APIC_ICR, data); + return 0; +} + /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { @@ -3153,29 +3176,6 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } -#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) - -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) -{ - if (data & X2APIC_ICR_RESERVED_BITS) - return 1; - - /* - * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but - * only AMD requires it to be zero, Intel essentially just ignores the - * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, - * the CPU performs the reserved bits checks, i.e. the underlying CPU - * behavior will "win". Arbitrarily clear the BUSY bit, as there is no - * sane way to provide consistent behavior with respect to hardware. - */ - data &= ~APIC_ICR_BUSY; - - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg64(apic, APIC_ICR, data); - trace_kvm_apic_write(APIC_ICR, data); - return 0; -} - static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) { u32 low; From 4777225ec89f52bb9ca16a33cfb44c189f1b7b47 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:51 -0700 Subject: [PATCH 407/534] KVM: Use dedicated mutex to protect kvm_usage_count to avoid deadlock commit 44d17459626052a2390457e550a12cb973506b2f upstream. Use a dedicated mutex to guard kvm_usage_count to fix a potential deadlock on x86 due to a chain of locks and SRCU synchronizations. Translating the below lockdep splat, CPU1 #6 will wait on CPU0 #1, CPU0 #8 will wait on CPU2 #3, and CPU2 #7 will wait on CPU1 #4 (if there's a writer, due to the fairness of r/w semaphores). CPU0 CPU1 CPU2 1 lock(&kvm->slots_lock); 2 lock(&vcpu->mutex); 3 lock(&kvm->srcu); 4 lock(cpu_hotplug_lock); 5 lock(kvm_lock); 6 lock(&kvm->slots_lock); 7 lock(cpu_hotplug_lock); 8 sync(&kvm->srcu); Note, there are likely more potential deadlocks in KVM x86, e.g. the same pattern of taking cpu_hotplug_lock outside of kvm_lock likely exists with __kvmclock_cpufreq_notifier(): cpuhp_cpufreq_online() | -> cpufreq_online() | -> cpufreq_gov_performance_limits() | -> __cpufreq_driver_target() | -> __target_index() | -> cpufreq_freq_transition_begin() | -> cpufreq_notify_transition() | -> ... __kvmclock_cpufreq_notifier() But, actually triggering such deadlocks is beyond rare due to the combination of dependencies and timings involved. E.g. the cpufreq notifier is only used on older CPUs without a constant TSC, mucking with the NX hugepage mitigation while VMs are running is very uncommon, and doing so while also onlining/offlining a CPU (necessary to generate contention on cpu_hotplug_lock) would be even more unusual. The most robust solution to the general cpu_hotplug_lock issue is likely to switch vm_list to be an RCU-protected list, e.g. so that x86's cpufreq notifier doesn't to take kvm_lock. For now, settle for fixing the most blatant deadlock, as switching to an RCU-protected list is a much more involved change, but add a comment in locking.rst to call out that care needs to be taken when walking holding kvm_lock and walking vm_list. ====================================================== WARNING: possible circular locking dependency detected 6.10.0-smp--c257535a0c9d-pip #330 Tainted: G S O ------------------------------------------------------ tee/35048 is trying to acquire lock: ff6a80eced71e0a8 (&kvm->slots_lock){+.+.}-{3:3}, at: set_nx_huge_pages+0x179/0x1e0 [kvm] but task is already holding lock: ffffffffc07abb08 (kvm_lock){+.+.}-{3:3}, at: set_nx_huge_pages+0x14a/0x1e0 [kvm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (kvm_lock){+.+.}-{3:3}: __mutex_lock+0x6a/0xb40 mutex_lock_nested+0x1f/0x30 kvm_dev_ioctl+0x4fb/0xe50 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #2 (cpu_hotplug_lock){++++}-{0:0}: cpus_read_lock+0x2e/0xb0 static_key_slow_inc+0x16/0x30 kvm_lapic_set_base+0x6a/0x1c0 [kvm] kvm_set_apic_base+0x8f/0xe0 [kvm] kvm_set_msr_common+0x9ae/0xf80 [kvm] vmx_set_msr+0xa54/0xbe0 [kvm_intel] __kvm_set_msr+0xb6/0x1a0 [kvm] kvm_arch_vcpu_ioctl+0xeca/0x10c0 [kvm] kvm_vcpu_ioctl+0x485/0x5b0 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&kvm->srcu){.+.+}-{0:0}: __synchronize_srcu+0x44/0x1a0 synchronize_srcu_expedited+0x21/0x30 kvm_swap_active_memslots+0x110/0x1c0 [kvm] kvm_set_memslot+0x360/0x620 [kvm] __kvm_set_memory_region+0x27b/0x300 [kvm] kvm_vm_ioctl_set_memory_region+0x43/0x60 [kvm] kvm_vm_ioctl+0x295/0x650 [kvm] __se_sys_ioctl+0x7b/0xd0 __x64_sys_ioctl+0x21/0x30 x64_sys_call+0x15d0/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&kvm->slots_lock){+.+.}-{3:3}: __lock_acquire+0x15ef/0x2e30 lock_acquire+0xe0/0x260 __mutex_lock+0x6a/0xb40 mutex_lock_nested+0x1f/0x30 set_nx_huge_pages+0x179/0x1e0 [kvm] param_attr_store+0x93/0x100 module_attr_store+0x22/0x40 sysfs_kf_write+0x81/0xb0 kernfs_fop_write_iter+0x133/0x1d0 vfs_write+0x28d/0x380 ksys_write+0x70/0xe0 __x64_sys_write+0x1f/0x30 x64_sys_call+0x281b/0x2e60 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Cc: Chao Gao Fixes: 0bf50497f03b ("KVM: Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock") Cc: stable@vger.kernel.org Reviewed-by: Kai Huang Acked-by: Kai Huang Tested-by: Farrah Chen Signed-off-by: Sean Christopherson Message-ID: <20240830043600.127750-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- Documentation/virt/kvm/locking.rst | 32 +++++++++++++++++++++--------- virt/kvm/kvm_main.c | 31 +++++++++++++++-------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 3a034db5e55f..703d3781c775 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -9,7 +9,7 @@ KVM Lock Overview The acquisition orders for mutexes are as follows: -- cpus_read_lock() is taken outside kvm_lock +- cpus_read_lock() is taken outside kvm_lock and kvm_usage_lock - kvm->lock is taken outside vcpu->mutex @@ -24,6 +24,12 @@ The acquisition orders for mutexes are as follows: are taken on the waiting side when modifying memslots, so MMU notifiers must not take either kvm->slots_lock or kvm->slots_arch_lock. +cpus_read_lock() vs kvm_lock: +- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that + being the official ordering, as it is quite easy to unknowingly trigger + cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list, + e.g. avoid complex operations when possible. + For SRCU: - ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections @@ -228,10 +234,17 @@ time it will be set using the Dirty tracking mechanism described above. :Type: mutex :Arch: any :Protects: - vm_list - - kvm_usage_count + +``kvm_usage_lock`` +^^^^^^^^^^^^^^^^^^ + +:Type: mutex +:Arch: any +:Protects: - kvm_usage_count - hardware virtualization enable/disable -:Comment: KVM also disables CPU hotplug via cpus_read_lock() during - enable/disable. +:Comment: Exists because using kvm_lock leads to deadlock (see earlier comment + on cpus_read_lock() vs kvm_lock). Note, KVM also disables CPU hotplug via + cpus_read_lock() when enabling/disabling virtualization. ``kvm->mn_invalidate_lock`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -291,11 +304,12 @@ time it will be set using the Dirty tracking mechanism described above. wakeup. ``vendor_module_lock`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ :Type: mutex :Arch: x86 :Protects: loading a vendor module (kvm_amd or kvm_intel) -:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is - taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and - many operations need to take cpu_hotplug_lock when loading a vendor module, - e.g. updating static calls. +:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken + in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while + cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many + operations need to take cpu_hotplug_lock when loading a vendor module, e.g. + updating static calls. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0af1546cc223..44c228bcd699 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5174,6 +5174,7 @@ __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static DEFINE_PER_CPU(bool, hardware_enabled); +static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; static int __hardware_enable_nolock(void) @@ -5206,10 +5207,10 @@ static int kvm_online_cpu(unsigned int cpu) * be enabled. Otherwise running VMs would encounter unrecoverable * errors when scheduled to this CPU. */ - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); if (kvm_usage_count) ret = __hardware_enable_nolock(); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); return ret; } @@ -5229,10 +5230,10 @@ static void hardware_disable_nolock(void *junk) static int kvm_offline_cpu(unsigned int cpu) { - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); return 0; } @@ -5248,9 +5249,9 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { cpus_read_lock(); - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); hardware_disable_all_nolock(); - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); cpus_read_unlock(); } @@ -5281,7 +5282,7 @@ static int hardware_enable_all(void) * enable hardware multiple times. */ cpus_read_lock(); - mutex_lock(&kvm_lock); + mutex_lock(&kvm_usage_lock); r = 0; @@ -5295,7 +5296,7 @@ static int hardware_enable_all(void) } } - mutex_unlock(&kvm_lock); + mutex_unlock(&kvm_usage_lock); cpus_read_unlock(); return r; @@ -5323,13 +5324,13 @@ static int kvm_suspend(void) { /* * Secondary CPUs and CPU hotplug are disabled across the suspend/resume - * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count - * is stable. Assert that kvm_lock is not held to ensure the system - * isn't suspended while KVM is enabling hardware. Hardware enabling - * can be preempted, but the task cannot be frozen until it has dropped - * all locks (userspace tasks are frozen via a fake signal). + * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage + * count is stable. Assert that kvm_usage_lock is not held to ensure + * the system isn't suspended while KVM is enabling hardware. Hardware + * enabling can be preempted, but the task cannot be frozen until it has + * dropped all locks (userspace tasks are frozen via a fake signal). */ - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); if (kvm_usage_count) @@ -5339,7 +5340,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); if (kvm_usage_count) From a53841b074cc196c3caaa37e1f15d6bc90943b97 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 12 Jul 2024 16:30:03 -0400 Subject: [PATCH 408/534] drm/amd/display: Skip Recompute DSC Params if no Stream on Link commit 8151a6c13111b465dbabe07c19f572f7cbd16fef upstream. [why] Encounter NULL pointer dereference uner mst + dsc setup. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 [how] dsc recompute should be skipped if no mode change detected on the new request. If detected, keep checking whether the stream is already on current state or not. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 43173a1f99d0..16654d02e387 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1266,6 +1266,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + if (new_stream_on_link_num == 0) return false; From 55fcbe5f60865717479a9d6a8d8d076c8f443759 Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Mon, 19 Aug 2024 13:25:27 -0400 Subject: [PATCH 409/534] drm/amd/display: Add HDMI DSC native YCbCr422 support commit 07bfa9cdbf3cd2daadfaaba0601f126f45951ffa upstream. [WHY && HOW] For some HDMI OVT timing, YCbCr422 encoding fails at the DSC bandwidth check. The root cause is our DSC policy for timing doesn't account for HDMI YCbCr422 native support. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Chris Park Signed-off-by: Leo Ma Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dc_dsc.h | 3 ++- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 16654d02e387..7fb11445a28f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1115,7 +1115,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel; params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported; - dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); if (!dc_dsc_compute_bandwidth_range( stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, @@ -1586,7 +1586,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, { struct dc_dsc_policy dsc_policy = {0}; - dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, dsc_policy.min_target_bpp * 16, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index fe3078b8789e..01c07545ef6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -100,7 +100,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( */ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy); + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding); void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 230be292ff35..9edc9b0e3f08 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -861,7 +861,7 @@ static bool setup_dsc_config( memset(dsc_cfg, 0, sizeof(struct dc_dsc_config)); - dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy); + dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding); pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right; pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; @@ -1134,7 +1134,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy) + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding) { uint32_t bpc = 0; From 9ce1ee22dc6829175ec2dfd74d78d79eb20e0732 Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Fri, 23 Aug 2024 15:00:28 +0800 Subject: [PATCH 410/534] drm/amd/display: Round calculated vtotal commit c03fca619fc687338a3b6511fdbed94096abdf79 upstream. [WHY] The calculated vtotal may has 1 line deviation. To get precisely vtotal number, round the vtotal result. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Anthony Koo Signed-off-by: Robin Chen Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ef3a67409021..803586f4267a 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -133,7 +133,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), - stream->timing.h_total), 1000000); + stream->timing.h_total) + 500000, 1000000); /* v_total cannot be less than nominal */ if (v_total < stream->timing.v_total) { From c886061bbdd1f37190939b11926055413e86a56c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Sep 2024 13:00:39 -0500 Subject: [PATCH 411/534] drm/amd/display: Validate backlight caps are sane commit 327e62f47eb57ae5ff63de82b0815557104e439a upstream. Currently amdgpu takes backlight caps provided by the ACPI tables on systems as is. If the firmware sets maximums that are too low this means that users don't get a good experience. To avoid having to maintain a quirk list of such systems, do a sanity check on the values. Check that the spread is at least half of the values that amdgpu would use if no ACPI table was found and if not use the amdgpu defaults. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3020 Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f0ebf686b06f..7731ce7762b7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4037,6 +4037,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 +#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2) #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, @@ -4051,6 +4052,21 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, return; amdgpu_acpi_get_backlight_caps(&caps); + + /* validate the firmware value is sane */ + if (caps.caps_valid) { + int spread = caps.max_input_signal - caps.min_input_signal; + + if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + caps.min_input_signal < AMDGPU_DM_DEFAULT_MIN_BACKLIGHT || + spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + spread < AMDGPU_DM_MIN_SPREAD) { + DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", + caps.min_input_signal, caps.max_input_signal); + caps.caps_valid = false; + } + } + if (caps.caps_valid) { dm->backlight_caps[bl_idx].caps_valid = true; if (caps.aux_support) From a3765b497a4f5224cb2f7a6a2d3357d3066214ee Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 17 Sep 2024 18:54:53 +0300 Subject: [PATCH 412/534] KEYS: prevent NULL pointer dereference in find_asymmetric_key() commit 70fd1966c93bf3bfe3fe6d753eb3d83a76597eef upstream. In find_asymmetric_key(), if all NULLs are passed in the id_{0,1,2} arguments, the kernel will first emit WARN but then have an oops because id_2 gets dereferenced anyway. Add the missing id_2 check and move WARN_ON() to the final else branch to avoid duplicate NULL checks. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. Cc: stable@vger.kernel.org # v5.17+ Fixes: 7d30198ee24f ("keys: X.509 public key issuer lookup without AKID") Suggested-by: Sergey Shtylyov Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/asymmetric_type.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index a5da8ccd353e..43af5fa510c0 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -60,17 +60,18 @@ struct key *find_asymmetric_key(struct key *keyring, char *req, *p; int len; - WARN_ON(!id_0 && !id_1 && !id_2); - if (id_0) { lookup = id_0->data; len = id_0->len; } else if (id_1) { lookup = id_1->data; len = id_1->len; - } else { + } else if (id_2) { lookup = id_2->data; len = id_2->len; + } else { + WARN_ON(1); + return ERR_PTR(-EINVAL); } /* Construct an identifier "id:". */ From ae619de5000b73562a1ef42ce03d10e7a6fff6a7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Sep 2024 22:05:10 +1000 Subject: [PATCH 413/534] powerpc/atomic: Use YZ constraints for DS-form instructions commit 39190ac7cff1fd15135fa8e658030d9646fdb5f2 upstream. The 'ld' and 'std' instructions require a 4-byte aligned displacement because they are DS-form instructions. But the "m" asm constraint doesn't enforce that. That can lead to build errors if the compiler chooses a non-aligned displacement, as seen with GCC 14: /tmp/ccuSzwiR.s: Assembler messages: /tmp/ccuSzwiR.s:2579: Error: operand out of domain (39 is not a multiple of 4) make[5]: *** [scripts/Makefile.build:229: net/core/page_pool.o] Error 1 Dumping the generated assembler shows: ld 8,39(8) # MEM[(const struct atomic64_t *)_29].counter, t Use the YZ constraints to tell the compiler either to generate a DS-form displacement, or use an X-form instruction, either of which prevents the build error. See commit 2d43cc701b96 ("powerpc/uaccess: Fix build errors seen with GCC 13/14") for more details on the constraint letters. Fixes: 9f0cbea0d8cc ("[POWERPC] Implement atomic{, 64}_{read, write}() without volatile") Cc: stable@vger.kernel.org # v2.6.24+ Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240913125302.0a06b4c7@canb.auug.org.au Tested-by: Mina Almasry Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://msgid.link/20240916120510.2017749-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/asm-compat.h | 6 ++++++ arch/powerpc/include/asm/atomic.h | 5 +++-- arch/powerpc/include/asm/uaccess.h | 7 +------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2bc53c646ccd..83848b534cb1 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -39,6 +39,12 @@ #define STDX_BE stringify_in_c(stdbrx) #endif +#ifdef CONFIG_CC_IS_CLANG +#define DS_FORM_CONSTRAINT "Z<>" +#else +#define DS_FORM_CONSTRAINT "YZ<>" +#endif + #else /* 32-bit */ /* operations for longs and pointers */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 5bf6a4d49268..d1ea554c33ed 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -197,7 +198,7 @@ static __inline__ s64 arch_atomic64_read(const atomic64_t *v) if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED)) __asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : "b"(&v->counter)); else - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : DS_FORM_CONSTRAINT (v->counter)); return t; } @@ -208,7 +209,7 @@ static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i) if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED)) __asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter)); else - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=" DS_FORM_CONSTRAINT (v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ccc91bf9b034..a81bd825087c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __powerpc64__ /* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */ @@ -92,12 +93,6 @@ __pu_failed: \ : label) #endif -#ifdef CONFIG_CC_IS_CLANG -#define DS_FORM_CONSTRAINT "Z<>" -#else -#define DS_FORM_CONSTRAINT "YZ<>" -#endif - #ifdef __powerpc64__ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __put_user_asm2_goto(x, ptr, label) \ From ef83620438d7b08cd66269c2262fcc2007cd5454 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 30 Dec 2023 19:46:00 -0500 Subject: [PATCH 414/534] fs: Create a generic is_dot_dotdot() utility commit 42c3732fa8073717dd7d924472f1c0bc5b452fdc upstream. De-duplicate the same functionality in several places by hoisting the is_dot_dotdot() utility function into linux/fs.h. Suggested-by: Amir Goldstein Reviewed-by: Jeff Layton Reviewed-by: Amir Goldstein Acked-by: Christian Brauner Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/crypto/fname.c | 8 +------- fs/ecryptfs/crypto.c | 10 ---------- fs/f2fs/f2fs.h | 11 ----------- fs/namei.c | 6 ++---- include/linux/fs.h | 11 +++++++++++ 5 files changed, 14 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 6eae3f12ad50..553af738bb3e 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -74,13 +74,7 @@ struct fscrypt_nokey_name { static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; + return is_dot_dotdot(str->name, str->len); } /** diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 03bd55069d86..2fe0f3af1a08 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1949,16 +1949,6 @@ out: return rc; } -static bool is_dot_dotdot(const char *name, size_t name_size) -{ - if (name_size == 1 && name[0] == '.') - return true; - else if (name_size == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - /** * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext * @plaintext_name: The plaintext name diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fcc2f9e6dc85..7faf9446ea5d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3370,17 +3370,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const u8 *name, size_t len) -{ - if (len == 1 && name[0] == '.') - return true; - - if (len == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { diff --git a/fs/namei.c b/fs/namei.c index e728ba085ebe..beffbb02a24e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2667,10 +2667,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, if (!len) return -EACCES; - if (unlikely(name[0] == '.')) { - if (len < 2 || (len == 2 && name[1] == '.')) - return -EACCES; - } + if (is_dot_dotdot(name, len)) + return -EACCES; while (len--) { unsigned int c = *(const unsigned char *)name++; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a554e4045a9..6c3d86532e3f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2844,6 +2844,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include /* needed for stackable file system support */ From 3c1fd66a191485d9ad4c5a975695fb1bba2a1566 Mon Sep 17 00:00:00 2001 From: Hobin Woo Date: Wed, 4 Sep 2024 13:36:35 +0900 Subject: [PATCH 415/534] ksmbd: make __dir_empty() compatible with POSIX commit ca4974ca954561e79f8871d220bb08f14f64f57c upstream. Some file systems may not provide dot (.) and dot-dot (..) as they are optional in POSIX. ksmbd can misjudge emptiness of a directory in those file systems, since it assumes there are always at least two entries: dot and dot-dot. Just don't count dot and dot-dot. Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Hobin Woo Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index dceb4bc76a66..3c190eba8200 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1110,9 +1110,10 @@ static bool __dir_empty(struct dir_context *ctx, const char *name, int namlen, struct ksmbd_readdir_data *buf; buf = container_of(ctx, struct ksmbd_readdir_data, ctx); - buf->dirent_count++; + if (!is_dot_dotdot(name, namlen)) + buf->dirent_count++; - return buf->dirent_count <= 2; + return !buf->dirent_count; } /** @@ -1132,7 +1133,7 @@ int ksmbd_vfs_empty_dir(struct ksmbd_file *fp) readdir_data.dirent_count = 0; err = iterate_dir(fp->filp, &readdir_data.ctx); - if (readdir_data.dirent_count > 2) + if (readdir_data.dirent_count) err = -ENOTEMPTY; else err = 0; From 30fe2a885c28a1c97343c324948f2aca88f8c268 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 3 Sep 2024 20:26:33 +0900 Subject: [PATCH 416/534] ksmbd: allow write with FILE_APPEND_DATA commit 2fb9b5dc80cabcee636a6ccd020740dd925b4580 upstream. Windows client write with FILE_APPEND_DATA when using git. ksmbd should allow write it with this flags. Z:\test>git commit -m "test" fatal: cannot update the ref 'HEAD': unable to append to '.git/logs/HEAD': Bad file descriptor Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 3c190eba8200..76d6ec542509 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -496,7 +496,7 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, int err = 0; if (work->conn->connection_type) { - if (!(fp->daccess & FILE_WRITE_DATA_LE)) { + if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) { pr_err("no right to write(%pD)\n", fp->filp); err = -EACCES; goto out; From 6ab95e27b77730de3fa2d601db3764490c5eede2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 8 Sep 2024 15:23:48 +0900 Subject: [PATCH 417/534] ksmbd: handle caseless file creation commit c5a709f08d40b1a082e44ffcde1aea4d2822ddd5 upstream. Ray Zhang reported ksmbd can not create file if parent filename is caseless. Y:\>mkdir A Y:\>echo 123 >a\b.txt The system cannot find the path specified. Y:\>echo 123 >A\b.txt This patch convert name obtained by caseless lookup to parent name. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Ray Zhang Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 76d6ec542509..2c548e8efef0 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1162,7 +1162,7 @@ static bool __caseless_lookup(struct dir_context *ctx, const char *name, if (cmp < 0) cmp = strncasecmp((char *)buf->private, name, namlen); if (!cmp) { - memcpy((char *)buf->private, name, namlen); + memcpy((char *)buf->private, name, buf->used); buf->dirent_count = 1; return false; } @@ -1230,10 +1230,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, char *filepath; size_t path_len, remain_len; - filepath = kstrdup(name, GFP_KERNEL); - if (!filepath) - return -ENOMEM; - + filepath = name; path_len = strlen(filepath); remain_len = path_len; @@ -1276,10 +1273,9 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, err = -EINVAL; out2: path_put(parent_path); -out1: - kfree(filepath); } +out1: if (!err) { err = mnt_want_write(parent_path->mnt); if (err) { From facf1e49a04a94ae4e1bd8267d766a2ee3dfd2ac Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 23 Sep 2024 18:14:36 +0900 Subject: [PATCH 418/534] ata: libata-scsi: Fix ata_msense_control() CDL page reporting commit 0e9a2990a93f27daa643b6fa73cfa47b128947a7 upstream. When the user requests the ALL_SUB_MPAGES mode sense page, ata_msense_control() adds the CDL_T2A_SUB_MPAGE twice instead of adding the CDL_T2A_SUB_MPAGE and CDL_T2B_SUB_MPAGE pages information. Correct the second call to ata_msense_control_spgt2() to report the CDL_T2B_SUB_MPAGE page. Fixes: 673b2fe6ff1d ("scsi: ata: libata-scsi: Add support for CDL pages mode sense") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7a71add807a3..5377d094bf75 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2390,7 +2390,7 @@ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf, case ALL_SUB_MPAGES: n = ata_msense_control_spg0(dev, buf, changeable); n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); - n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); + n += ata_msense_control_spgt2(dev, buf + n, CDL_T2B_SUB_MPAGE); n += ata_msense_control_ata_feature(dev, buf + n); return n; default: From 568c7c4c77eee6df7677bb861b7cee7398a3255d Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 12 Sep 2024 15:43:08 +0200 Subject: [PATCH 419/534] scsi: sd: Fix off-by-one error in sd_read_block_characteristics() commit f81eaf08385ddd474a2f41595a7757502870c0eb upstream. Ff the device returns page 0xb1 with length 8 (happens with qemu v2.x, for example), sd_read_block_characteristics() may attempt an out-of-bounds memory access when accessing the zoned field at offset 8. Fixes: 7fb019c46eee ("scsi: sd: Switch to using scsi_device VPD pages") Cc: stable@vger.kernel.org Signed-off-by: Martin Wilck Link: https://lore.kernel.org/r/20240912134308.282824-1-mwilck@suse.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4fce7cc32cb2..b0a574c534c4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3118,7 +3118,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); - if (!vpd || vpd->len < 8) { + if (!vpd || vpd->len <= 8) { rcu_read_unlock(); return; } From 09b06c2591fa8b024e2850de53137935e1ea0c47 Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Tue, 3 Sep 2024 12:07:09 +0530 Subject: [PATCH 420/534] scsi: ufs: qcom: Update MODE_MAX cfg_bw value commit 0c40f079f1c808e7e480c795a79009f200366eb1 upstream. Commit 8db8f6ce556a ("scsi: ufs: qcom: Add missing interconnect bandwidth values for Gear 5") updated the ufs_qcom_bw_table for Gear 5. However, it missed updating the cfg_bw value for the max mode. Hence update the cfg_bw value for the max mode for UFS 4.x devices. Fixes: 8db8f6ce556a ("scsi: ufs: qcom: Add missing interconnect bandwidth values for Gear 5") Cc: stable@vger.kernel.org Signed-off-by: Manish Pandey Link: https://lore.kernel.org/r/20240903063709.4335-1-quic_mapa@quicinc.com Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ufs/host/ufs-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 922ae1d76d90..643157a92c62 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -93,7 +93,7 @@ static const struct __ufs_qcom_bw_table { [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, - [MODE_MAX][0][0] = { 7643136, 307200 }, + [MODE_MAX][0][0] = { 7643136, 819200 }, }; static struct ufs_qcom_host *ufs_qcom_hosts[MAX_UFS_QCOM_HOSTS]; From 6e8dc2050a4a701c4ce650ea7dab5d67f3c37820 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 7 Aug 2024 13:36:28 +1000 Subject: [PATCH 421/534] scsi: mac_scsi: Revise printk(KERN_DEBUG ...) messages commit 5ec4f820cb9766e4583df947150a6febce8da794 upstream. After a bus fault, capture and log the chip registers immediately, if the NDEBUG_PSEUDO_DMA macro is defined. Remove some printk(KERN_DEBUG ...) messages that aren't needed any more. Don't skip the debug message when bytes == 0. Show all of the byte counters in the debug messages. Cc: stable@vger.kernel.org # 5.15+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/7573c79f4e488fc00af2b8a191e257ca945e0409.1723001788.git.fthain@linux-m68k.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mac_scsi.c | 42 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index 2e511697fce3..d15d629f961c 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -286,13 +286,14 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_DRQ | BASR_PHASE_MATCH, BASR_DRQ | BASR_PHASE_MATCH, 0)) { - int bytes; + int bytes, chunk_bytes; if (macintosh_config->ident == MAC_MODEL_IIFX) write_ctrl_reg(hostdata, CTRL_HANDSHAKE_MODE | CTRL_INTERRUPTS_ENABLE); - bytes = mac_pdma_recv(s, d, min(hostdata->pdma_residual, 512)); + chunk_bytes = min(hostdata->pdma_residual, 512); + bytes = mac_pdma_recv(s, d, chunk_bytes); if (bytes > 0) { d += bytes; @@ -302,23 +303,23 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, if (hostdata->pdma_residual == 0) goto out; - if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ, - BUS_AND_STATUS_REG, BASR_ACK, - BASR_ACK, 0) < 0) - scmd_printk(KERN_DEBUG, hostdata->connected, - "%s: !REQ and !ACK\n", __func__); if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) goto out; if (bytes == 0) udelay(MAC_PDMA_DELAY); - if (bytes >= 0) + if (bytes > 0) continue; - dsprintk(NDEBUG_PSEUDO_DMA, hostdata->host, - "%s: bus error (%d/%d)\n", __func__, d - dst, len); NCR5380_dprint(NDEBUG_PSEUDO_DMA, hostdata->host); + dsprintk(NDEBUG_PSEUDO_DMA, hostdata->host, + "%s: bus error [%d/%d] (%d/%d)\n", + __func__, d - dst, len, bytes, chunk_bytes); + + if (bytes == 0) + continue; + result = -1; goto out; } @@ -345,13 +346,14 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_DRQ | BASR_PHASE_MATCH, BASR_DRQ | BASR_PHASE_MATCH, 0)) { - int bytes; + int bytes, chunk_bytes; if (macintosh_config->ident == MAC_MODEL_IIFX) write_ctrl_reg(hostdata, CTRL_HANDSHAKE_MODE | CTRL_INTERRUPTS_ENABLE); - bytes = mac_pdma_send(s, d, min(hostdata->pdma_residual, 512)); + chunk_bytes = min(hostdata->pdma_residual, 512); + bytes = mac_pdma_send(s, d, chunk_bytes); if (bytes > 0) { s += bytes; @@ -370,23 +372,23 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, goto out; } - if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ, - BUS_AND_STATUS_REG, BASR_ACK, - BASR_ACK, 0) < 0) - scmd_printk(KERN_DEBUG, hostdata->connected, - "%s: !REQ and !ACK\n", __func__); if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) goto out; if (bytes == 0) udelay(MAC_PDMA_DELAY); - if (bytes >= 0) + if (bytes > 0) continue; - dsprintk(NDEBUG_PSEUDO_DMA, hostdata->host, - "%s: bus error (%d/%d)\n", __func__, s - src, len); NCR5380_dprint(NDEBUG_PSEUDO_DMA, hostdata->host); + dsprintk(NDEBUG_PSEUDO_DMA, hostdata->host, + "%s: bus error [%d/%d] (%d/%d)\n", + __func__, s - src, len, bytes, chunk_bytes); + + if (bytes == 0) + continue; + result = -1; goto out; } From 0120c7762f259be8a8fbd5114a18cf13b3d93a9a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 7 Aug 2024 13:36:28 +1000 Subject: [PATCH 422/534] scsi: mac_scsi: Refactor polling loop commit 5545c3165cbc98615fe65a44f41167cbb557e410 upstream. Before the error handling can be revised, some preparation is needed. Refactor the polling loop with a new function, macscsi_wait_for_drq(). This function will gain more call sites in the next patch. Cc: stable@vger.kernel.org # 5.15+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/6a5ffabb4290c0d138c6d285fda8fa3902e926f0.1723001788.git.fthain@linux-m68k.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mac_scsi.c | 80 +++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index d15d629f961c..0d1935ee6e15 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -208,8 +208,6 @@ __setup("mac5380=", mac_scsi_setup); ".previous \n" \ : "+a" (addr), "+r" (n), "+r" (result) : "a" (io)) -#define MAC_PDMA_DELAY 32 - static inline int mac_pdma_recv(void __iomem *io, unsigned char *start, int n) { unsigned char *addr = start; @@ -274,6 +272,36 @@ static inline void write_ctrl_reg(struct NCR5380_hostdata *hostdata, u32 value) out_be32(hostdata->io + (CTRL_REG << 4), value); } +static inline int macscsi_wait_for_drq(struct NCR5380_hostdata *hostdata) +{ + unsigned int n = 1; /* effectively multiplies NCR5380_REG_POLL_TIME */ + unsigned char basr; + +again: + basr = NCR5380_read(BUS_AND_STATUS_REG); + + if (!(basr & BASR_PHASE_MATCH)) + return 1; + + if (basr & BASR_IRQ) + return -1; + + if (basr & BASR_DRQ) + return 0; + + if (n-- == 0) { + NCR5380_dprint(NDEBUG_PSEUDO_DMA, hostdata->host); + dsprintk(NDEBUG_PSEUDO_DMA, hostdata->host, + "%s: DRQ timeout\n", __func__); + return -1; + } + + NCR5380_poll_politely2(hostdata, + BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ, + BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, 0); + goto again; +} + static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, unsigned char *dst, int len) { @@ -283,9 +311,7 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, hostdata->pdma_residual = len; - while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, - BASR_DRQ | BASR_PHASE_MATCH, - BASR_DRQ | BASR_PHASE_MATCH, 0)) { + while (macscsi_wait_for_drq(hostdata) == 0) { int bytes, chunk_bytes; if (macintosh_config->ident == MAC_MODEL_IIFX) @@ -295,19 +321,16 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, chunk_bytes = min(hostdata->pdma_residual, 512); bytes = mac_pdma_recv(s, d, chunk_bytes); + if (macintosh_config->ident == MAC_MODEL_IIFX) + write_ctrl_reg(hostdata, CTRL_INTERRUPTS_ENABLE); + if (bytes > 0) { d += bytes; hostdata->pdma_residual -= bytes; } if (hostdata->pdma_residual == 0) - goto out; - - if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) - goto out; - - if (bytes == 0) - udelay(MAC_PDMA_DELAY); + break; if (bytes > 0) continue; @@ -321,16 +344,9 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, continue; result = -1; - goto out; + break; } - scmd_printk(KERN_ERR, hostdata->connected, - "%s: phase mismatch or !DRQ\n", __func__); - NCR5380_dprint(NDEBUG_PSEUDO_DMA, hostdata->host); - result = -1; -out: - if (macintosh_config->ident == MAC_MODEL_IIFX) - write_ctrl_reg(hostdata, CTRL_INTERRUPTS_ENABLE); return result; } @@ -343,9 +359,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, hostdata->pdma_residual = len; - while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, - BASR_DRQ | BASR_PHASE_MATCH, - BASR_DRQ | BASR_PHASE_MATCH, 0)) { + while (macscsi_wait_for_drq(hostdata) == 0) { int bytes, chunk_bytes; if (macintosh_config->ident == MAC_MODEL_IIFX) @@ -355,6 +369,9 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, chunk_bytes = min(hostdata->pdma_residual, 512); bytes = mac_pdma_send(s, d, chunk_bytes); + if (macintosh_config->ident == MAC_MODEL_IIFX) + write_ctrl_reg(hostdata, CTRL_INTERRUPTS_ENABLE); + if (bytes > 0) { s += bytes; hostdata->pdma_residual -= bytes; @@ -369,15 +386,9 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, "%s: Last Byte Sent timeout\n", __func__); result = -1; } - goto out; + break; } - if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH)) - goto out; - - if (bytes == 0) - udelay(MAC_PDMA_DELAY); - if (bytes > 0) continue; @@ -390,16 +401,9 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, continue; result = -1; - goto out; + break; } - scmd_printk(KERN_ERR, hostdata->connected, - "%s: phase mismatch or !DRQ\n", __func__); - NCR5380_dprint(NDEBUG_PSEUDO_DMA, hostdata->host); - result = -1; -out: - if (macintosh_config->ident == MAC_MODEL_IIFX) - write_ctrl_reg(hostdata, CTRL_INTERRUPTS_ENABLE); return result; } From ccc87864b0978e1c96dff78377211cb10018fa3a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 7 Aug 2024 13:36:28 +1000 Subject: [PATCH 423/534] scsi: mac_scsi: Disallow bus errors during PDMA send commit 5551bc30e4a69ad86d0d008e2f56cd59b6583476 upstream. SD cards can produce write latency spikes on the order of a hundred milliseconds. If the target firmware does not hide that latency during DATA IN and OUT phases it can cause the PDMA circuitry to raise a processor bus fault which in turn leads to an unreliable byte count and a DMA overrun. The Last Byte Sent flag is used to detect the overrun but this mechanism is unreliable on some systems. Instead, set a DID_ERROR result whenever there is a bus fault during a PDMA send, unless the cause was a phase mismatch. Cc: stable@vger.kernel.org # 5.15+ Reported-and-tested-by: Stan Johnson Fixes: 7c1f3e3447a1 ("scsi: mac_scsi: Treat Last Byte Sent time-out as failure") Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/cc38df687ace2c4ffc375a683b2502fc476b600d.1723001788.git.fthain@linux-m68k.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mac_scsi.c | 44 ++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index 0d1935ee6e15..2c88ce24d19a 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -102,11 +102,15 @@ __setup("mac5380=", mac_scsi_setup); * Linux SCSI drivers lack knowledge of the timing behaviour of SCSI targets * so bus errors are unavoidable. * - * If a MOVE.B instruction faults, we assume that zero bytes were transferred - * and simply retry. That assumption probably depends on target behaviour but - * seems to hold up okay. The NOP provides synchronization: without it the - * fault can sometimes occur after the program counter has moved past the - * offending instruction. Post-increment addressing can't be used. + * If a MOVE.B instruction faults during a receive operation, we assume the + * target sent nothing and try again. That assumption probably depends on + * target firmware but it seems to hold up okay. If a fault happens during a + * send operation, the target may or may not have seen /ACK and got the byte. + * It's uncertain so the whole SCSI command gets retried. + * + * The NOP is needed for synchronization because the fault address in the + * exception stack frame may or may not be the instruction that actually + * caused the bus error. Post-increment addressing can't be used. */ #define MOVE_BYTE(operands) \ @@ -243,22 +247,21 @@ static inline int mac_pdma_send(unsigned char *start, void __iomem *io, int n) if (n >= 1) { MOVE_BYTE("%0@,%3@"); if (result) - goto out; + return -1; } if (n >= 1 && ((unsigned long)addr & 1)) { MOVE_BYTE("%0@,%3@"); if (result) - goto out; + return -2; } while (n >= 32) MOVE_16_WORDS("%0@+,%3@"); while (n >= 2) MOVE_WORD("%0@+,%3@"); if (result) - return start - addr; /* Negated to indicate uncertain length */ + return start - addr - 1; /* Negated to indicate uncertain length */ if (n == 1) MOVE_BYTE("%0@,%3@"); -out: return addr - start; } @@ -307,7 +310,6 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, { u8 __iomem *s = hostdata->pdma_io + (INPUT_DATA_REG << 4); unsigned char *d = dst; - int result = 0; hostdata->pdma_residual = len; @@ -343,11 +345,12 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata, if (bytes == 0) continue; - result = -1; + if (macscsi_wait_for_drq(hostdata) <= 0) + set_host_byte(hostdata->connected, DID_ERROR); break; } - return result; + return 0; } static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, @@ -355,7 +358,6 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, { unsigned char *s = src; u8 __iomem *d = hostdata->pdma_io + (OUTPUT_DATA_REG << 4); - int result = 0; hostdata->pdma_residual = len; @@ -377,17 +379,8 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, hostdata->pdma_residual -= bytes; } - if (hostdata->pdma_residual == 0) { - if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG, - TCR_LAST_BYTE_SENT, - TCR_LAST_BYTE_SENT, - 0) < 0) { - scmd_printk(KERN_ERR, hostdata->connected, - "%s: Last Byte Sent timeout\n", __func__); - result = -1; - } + if (hostdata->pdma_residual == 0) break; - } if (bytes > 0) continue; @@ -400,11 +393,12 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata, if (bytes == 0) continue; - result = -1; + if (macscsi_wait_for_drq(hostdata) <= 0) + set_host_byte(hostdata->connected, DID_ERROR); break; } - return result; + return 0; } static int macscsi_dma_xfer_len(struct NCR5380_hostdata *hostdata, From 9ecd9d7ad7f08ec2a2cf16caa60ca2d9311b863f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=A4tje?= Date: Thu, 5 Sep 2024 00:27:40 +0200 Subject: [PATCH 424/534] can: esd_usb: Remove CAN_CTRLMODE_3_SAMPLES for CAN-USB/3-FD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 75b3189540578f96b4996e4849b6649998f49455 upstream. Remove the CAN_CTRLMODE_3_SAMPLES announcement for CAN-USB/3-FD devices because these devices don't support it. The hardware has a Microchip SAM E70 microcontroller that uses a Bosch MCAN IP core as CAN FD controller. But this MCAN core doesn't support triple sampling. Fixes: 80662d943075 ("can: esd_usb: Add support for esd CAN-USB/3") Cc: stable@vger.kernel.org Signed-off-by: Stefan Mätje Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240904222740.2985864-2-stefan.maetje@esd.eu Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/esd_usb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 41a0e4261d15..03ad10b01867 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -3,7 +3,7 @@ * CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro * * Copyright (C) 2010-2012 esd electronic system design gmbh, Matthias Fuchs - * Copyright (C) 2022-2023 esd electronics gmbh, Frank Jungclaus + * Copyright (C) 2022-2024 esd electronics gmbh, Frank Jungclaus */ #include @@ -1116,9 +1116,6 @@ static int esd_usb_3_set_bittiming(struct net_device *netdev) if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) flags |= ESD_USB_3_BAUDRATE_FLAG_LOM; - if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) - flags |= ESD_USB_3_BAUDRATE_FLAG_TRS; - baud_x->nom.brp = cpu_to_le16(nom_bt->brp & (nom_btc->brp_max - 1)); baud_x->nom.sjw = cpu_to_le16(nom_bt->sjw & (nom_btc->sjw_max - 1)); baud_x->nom.tseg1 = cpu_to_le16((nom_bt->prop_seg + nom_bt->phase_seg1) @@ -1219,7 +1216,6 @@ static int esd_usb_probe_one_net(struct usb_interface *intf, int index) switch (le16_to_cpu(dev->udev->descriptor.idProduct)) { case ESD_USB_CANUSB3_PRODUCT_ID: priv->can.clock.freq = ESD_USB_3_CAN_CLOCK; - priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.bittiming_const = &esd_usb_3_nom_bittiming_const; priv->can.data_bittiming_const = &esd_usb_3_data_bittiming_const; From d71300d07f398f0db3b85dd3a59a3b5974a353a9 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 21 Aug 2024 16:11:03 +0300 Subject: [PATCH 425/534] wifi: rtw88: Fix USB/SDIO devices not transmitting beacons commit faa2e484b393c56bc1243dca6676a70bc485f775 upstream. All USB devices supported by rtw88 have the same problem: they don't transmit beacons in AP mode. (Some?) SDIO devices are also affected. The cause appears to be clearing BIT_EN_BCNQ_DL of REG_FWHW_TXQ_CTRL before uploading the beacon reserved page, so don't clear the bit for USB and SDIO devices. Tested with RTL8811CU and RTL8723DU. Cc: # 6.6.x Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/49de73b5-698f-4865-ab63-100e28dfc4a1@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/fw.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index a1b674e3caaa..3596cf99c2ed 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1388,10 +1388,12 @@ int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr, val |= BIT_ENSWBCN >> 8; rtw_write8(rtwdev, REG_CR + 1, val); - val = rtw_read8(rtwdev, REG_FWHW_TXQ_CTRL + 2); - bckp[1] = val; - val &= ~(BIT_EN_BCNQ_DL >> 16); - rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 2, val); + if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_PCIE) { + val = rtw_read8(rtwdev, REG_FWHW_TXQ_CTRL + 2); + bckp[1] = val; + val &= ~(BIT_EN_BCNQ_DL >> 16); + rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 2, val); + } ret = rtw_hci_write_data_rsvd_page(rtwdev, buf, size); if (ret) { @@ -1416,7 +1418,8 @@ restore: rsvd_pg_head = rtwdev->fifo.rsvd_boundary; rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2, rsvd_pg_head | BIT_BCN_VALID_V1); - rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 2, bckp[1]); + if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_PCIE) + rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 2, bckp[1]); rtw_write8(rtwdev, REG_CR + 1, bckp[0]); return ret; From 1e44ee6cdd123d6cfe78b4a94e1572e23bbb58ce Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 19 Sep 2024 14:33:42 +0200 Subject: [PATCH 426/534] usbnet: fix cyclical race on disconnect with work queue commit 04e906839a053f092ef53f4fb2d610983412b904 upstream. The work can submit URBs and the URBs can schedule the work. This cycle needs to be broken, when a device is to be stopped. Use a flag to do so. This is a design issue as old as the driver. Signed-off-by: Oliver Neukum Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") CC: stable@vger.kernel.org Link: https://patch.msgid.link/20240919123525.688065-1-oneukum@suse.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 37 ++++++++++++++++++++++++++++--------- include/linux/usb/usbnet.h | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 6cc1b56ddde2..60c58dd6d253 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -464,10 +464,15 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb, void usbnet_defer_kevent (struct usbnet *dev, int work) { set_bit (work, &dev->flags); - if (!schedule_work (&dev->kevent)) - netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]); - else - netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]); + if (!usbnet_going_away(dev)) { + if (!schedule_work(&dev->kevent)) + netdev_dbg(dev->net, + "kevent %s may have been dropped\n", + usbnet_event_names[work]); + else + netdev_dbg(dev->net, + "kevent %s scheduled\n", usbnet_event_names[work]); + } } EXPORT_SYMBOL_GPL(usbnet_defer_kevent); @@ -535,7 +540,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) tasklet_schedule (&dev->bh); break; case 0: - __usbnet_queue_skb(&dev->rxq, skb, rx_start); + if (!usbnet_going_away(dev)) + __usbnet_queue_skb(&dev->rxq, skb, rx_start); } } else { netif_dbg(dev, ifdown, dev->net, "rx: stopped\n"); @@ -843,9 +849,18 @@ int usbnet_stop (struct net_device *net) /* deferred work (timer, softirq, task) must also stop */ dev->flags = 0; - del_timer_sync (&dev->delay); - tasklet_kill (&dev->bh); + del_timer_sync(&dev->delay); + tasklet_kill(&dev->bh); cancel_work_sync(&dev->kevent); + + /* We have cyclic dependencies. Those calls are needed + * to break a cycle. We cannot fall into the gaps because + * we have a flag + */ + tasklet_kill(&dev->bh); + del_timer_sync(&dev->delay); + cancel_work_sync(&dev->kevent); + if (!pm) usb_autopm_put_interface(dev->intf); @@ -1171,7 +1186,8 @@ fail_halt: status); } else { clear_bit (EVENT_RX_HALT, &dev->flags); - tasklet_schedule (&dev->bh); + if (!usbnet_going_away(dev)) + tasklet_schedule(&dev->bh); } } @@ -1196,7 +1212,8 @@ fail_halt: usb_autopm_put_interface(dev->intf); fail_lowmem: if (resched) - tasklet_schedule (&dev->bh); + if (!usbnet_going_away(dev)) + tasklet_schedule(&dev->bh); } } @@ -1559,6 +1576,7 @@ static void usbnet_bh (struct timer_list *t) } else if (netif_running (dev->net) && netif_device_present (dev->net) && netif_carrier_ok(dev->net) && + !usbnet_going_away(dev) && !timer_pending(&dev->delay) && !test_bit(EVENT_RX_PAUSED, &dev->flags) && !test_bit(EVENT_RX_HALT, &dev->flags)) { @@ -1606,6 +1624,7 @@ void usbnet_disconnect (struct usb_interface *intf) usb_set_intfdata(intf, NULL); if (!dev) return; + usbnet_mark_going_away(dev); xdev = interface_to_usbdev (intf); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9f08a584d707..0b9f1e598e3a 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,8 +76,23 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +/* This one is special, as it indicates that the device is going away + * there are cyclic dependencies between tasklet, timer and bh + * that must be broken + */ +# define EVENT_UNPLUG 31 }; +static inline bool usbnet_going_away(struct usbnet *ubn) +{ + return test_bit(EVENT_UNPLUG, &ubn->flags); +} + +static inline void usbnet_mark_going_away(struct usbnet *ubn) +{ + set_bit(EVENT_UNPLUG, &ubn->flags); +} + static inline struct usb_driver *driver_of(struct usb_interface *intf) { return to_usb_driver(intf->dev.driver); From 090386dbedbc2f099c44a0136eb8eb8713930072 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 Jul 2024 11:44:08 +0800 Subject: [PATCH 427/534] arm64: dts: mediatek: mt8195-cherry: Mark USB 3.0 on xhci1 as disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 09d385679487c58f0859c1ad4f404ba3df2f8830 upstream. USB 3.0 on xhci1 is not used, as the controller shares the same PHY as pcie1. The latter is enabled to support the M.2 PCIe WLAN card on this design. Mark USB 3.0 as disabled on this controller using the "mediatek,u3p-dis-msk" property. Reported-by: Nícolas F. R. A. Prado #KernelCI Closes: https://lore.kernel.org/all/9fce9838-ef87-4d1b-b3df-63e1ddb0ec51@notapiano/ Fixes: b6267a396e1c ("arm64: dts: mediatek: cherry: Enable T-PHYs and USB XHCI controllers") Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240731034411.371178-2-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index b78f408110bf..34e18eb5d7f4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -1312,6 +1312,7 @@ usb2-lpm-disable; vusb33-supply = <&mt6359_vusb_ldo_reg>; vbus-supply = <&usb_vbus>; + mediatek,u3p-dis-msk = <1>; }; #include From ef08eb1605f5b9928c266ef2314478feb3d36aaa Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 12 Sep 2024 14:32:59 +0200 Subject: [PATCH 428/534] USB: appledisplay: close race between probe and completion handler commit 8265d06b7794493d82c5c21a12d7ba43eccc30cb upstream. There is a small window during probing when IO is running but the backlight is not registered. Processing events during that time will crash. The completion handler needs to check for a backlight before scheduling work. The bug is as old as the driver. Signed-off-by: Oliver Neukum CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240912123317.1026049-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index c8098e9b432e..62b5a30edc42 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -107,7 +107,12 @@ static void appledisplay_complete(struct urb *urb) case ACD_BTN_BRIGHT_UP: case ACD_BTN_BRIGHT_DOWN: pdata->button_pressed = 1; - schedule_delayed_work(&pdata->work, 0); + /* + * there is a window during which no device + * is registered + */ + if (pdata->bd ) + schedule_delayed_work(&pdata->work, 0); break; case ACD_BTN_NONE: default: @@ -202,6 +207,7 @@ static int appledisplay_probe(struct usb_interface *iface, const struct usb_device_id *id) { struct backlight_properties props; + struct backlight_device *backlight; struct appledisplay *pdata; struct usb_device *udev = interface_to_usbdev(iface); struct usb_endpoint_descriptor *endpoint; @@ -272,13 +278,14 @@ static int appledisplay_probe(struct usb_interface *iface, memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = 0xff; - pdata->bd = backlight_device_register(bl_name, NULL, pdata, + backlight = backlight_device_register(bl_name, NULL, pdata, &appledisplay_bl_data, &props); - if (IS_ERR(pdata->bd)) { + if (IS_ERR(backlight)) { dev_err(&iface->dev, "Backlight registration failed\n"); - retval = PTR_ERR(pdata->bd); + retval = PTR_ERR(backlight); goto error; } + pdata->bd = backlight; /* Try to get brightness */ brightness = appledisplay_bl_get_brightness(pdata->bd); From 7bcd961dcb5a3eaaf952e44f3d4595d7ca559baa Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 12 Sep 2024 14:54:43 +0200 Subject: [PATCH 429/534] USB: misc: cypress_cy7c63: check for short transfer commit 49cd2f4d747eeb3050b76245a7f72aa99dbd3310 upstream. As we process the second byte of a control transfer, transfers of less than 2 bytes must be discarded. This bug is as old as the driver. SIgned-off-by: Oliver Neukum CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240912125449.1030536-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/cypress_cy7c63.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c index cecd7693b741..75f5a740cba3 100644 --- a/drivers/usb/misc/cypress_cy7c63.c +++ b/drivers/usb/misc/cypress_cy7c63.c @@ -88,6 +88,9 @@ static int vendor_command(struct cypress *dev, unsigned char request, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER, address, data, iobuf, CYPRESS_MAX_REQSIZE, USB_CTRL_GET_TIMEOUT); + /* we must not process garbage */ + if (retval < 2) + goto err_buf; /* store returned data (more READs to be added) */ switch (request) { @@ -107,6 +110,7 @@ static int vendor_command(struct cypress *dev, unsigned char request, break; } +err_buf: kfree(iobuf); error: return retval; From a0b4cbeb093620334ae42fc8e05331a632671fae Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 12 Sep 2024 16:19:06 +0200 Subject: [PATCH 430/534] USB: class: CDC-ACM: fix race between get_serial and set_serial commit b41c1fa155ba56d125885b0191aabaf3c508d0a3 upstream. TIOCGSERIAL is an ioctl. Thus it must be atomic. It returns two values. Racing with set_serial it can return an inconsistent result. The mutex must be taken. In terms of logic the bug is as old as the driver. In terms of code it goes back to the conversion to the get_serial and set_serial methods. Signed-off-by: Oliver Neukum Cc: stable Fixes: 99f75a1fcd865 ("cdc-acm: switch to ->[sg]et_serial()") Link: https://lore.kernel.org/r/20240912141916.1044393-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0c1b69d944ca..605fea461102 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -962,10 +962,12 @@ static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss) struct acm *acm = tty->driver_data; ss->line = acm->minor; + mutex_lock(&acm->port.mutex); ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : jiffies_to_msecs(acm->port.closing_wait) / 10; + mutex_unlock(&acm->port.mutex); return 0; } From 19fb05d2e5c341be81f7a60fb404ef0d1d6ecab3 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 6 Sep 2024 06:48:54 +0000 Subject: [PATCH 431/534] usb: cdnsp: Fix incorrect usb_request status commit 1702bec4477cc7d31adb4a760d14d33fac928b7a upstream. Fix changes incorrect usb_request->status returned during disabling endpoints. Before fix the status returned during dequeuing requests while disabling endpoint was ECONNRESET. Patch change it to ESHUTDOWN. Patch fixes issue detected during testing UVC gadget. During stopping streaming the class starts dequeuing usb requests and controller driver returns the -ECONNRESET status. After completion requests the class or application "uvc-gadget" try to queue this request again. Changing this status to ESHUTDOWN cause that UVC assumes that endpoint is disabled, or device is disconnected and stops re-queuing usb requests. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: stable@vger.kernel.org Signed-off-by: Pawel Laszczak Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB9538E8CA7A2096AAF6A3718FDD9E2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index e0e97a138666..1d18d5002ef0 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -718,7 +718,8 @@ int cdnsp_remove_request(struct cdnsp_device *pdev, seg = cdnsp_trb_in_td(pdev, cur_td->start_seg, cur_td->first_trb, cur_td->last_trb, hw_deq); - if (seg && (pep->ep_state & EP_ENABLED)) + if (seg && (pep->ep_state & EP_ENABLED) && + !(pep->ep_state & EP_DIS_IN_RROGRESS)) cdnsp_find_new_dequeue_state(pdev, pep, preq->request.stream_id, cur_td, &deq_state); else @@ -736,7 +737,8 @@ int cdnsp_remove_request(struct cdnsp_device *pdev, * During disconnecting all endpoint will be disabled so we don't * have to worry about updating dequeue pointer. */ - if (pdev->cdnsp_state & CDNSP_STATE_DISCONNECT_PENDING) { + if (pdev->cdnsp_state & CDNSP_STATE_DISCONNECT_PENDING || + pep->ep_state & EP_DIS_IN_RROGRESS) { status = -ESHUTDOWN; ret = cdnsp_cmd_set_deq(pdev, pep, &deq_state); } From 4f7908ebafd51f7eb3d2b0a8b058da2f983dc5f6 Mon Sep 17 00:00:00 2001 From: Tomas Marek Date: Fri, 6 Sep 2024 07:50:25 +0200 Subject: [PATCH 432/534] usb: dwc2: drd: fix clock gating on USB role switch commit 2c6b6afa59e78bebcb65bbc8a76b3459f139547c upstream. The dwc2_handle_usb_suspend_intr() function disables gadget clocks in USB peripheral mode when no other power-down mode is available (introduced by commit 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.")). However, the dwc2_drd_role_sw_set() USB role update handler attempts to read DWC2 registers if the USB role has changed while the USB is in suspend mode (when the clocks are gated). This causes the system to hang. Release the gadget clocks before handling the USB role update. Fixes: 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.") Cc: stable@vger.kernel.org Signed-off-by: Tomas Marek Link: https://lore.kernel.org/r/20240906055025.25057-1-tomas.marek@elrest.cz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/drd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c index a8605b02115b..1ad8fa3f862a 100644 --- a/drivers/usb/dwc2/drd.c +++ b/drivers/usb/dwc2/drd.c @@ -127,6 +127,15 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role) role = USB_ROLE_DEVICE; } + if ((IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || + IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)) && + dwc2_is_device_mode(hsotg) && + hsotg->lx_state == DWC2_L2 && + hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && + hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + if (role == USB_ROLE_HOST) { already = dwc2_ovr_avalid(hsotg, true); } else if (role == USB_ROLE_DEVICE) { From 3ae13d48686b19de89bc23527fd2f194ef6313c7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 26 Aug 2024 07:49:34 +0200 Subject: [PATCH 433/534] bus: integrator-lm: fix OF node leak in probe() commit 15a62b81175885b5adfcaf49870466e3603f06c7 upstream. Driver code is leaking OF node reference from of_find_matching_node() in probe(). Fixes: ccea5e8a5918 ("bus: Add driver for Integrator/AP logic modules") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Liviu Dudau Link: https://lore.kernel.org/20240826054934.10724-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-integrator-lm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/arm-integrator-lm.c b/drivers/bus/arm-integrator-lm.c index b715c8ab36e8..a65c79b08804 100644 --- a/drivers/bus/arm-integrator-lm.c +++ b/drivers/bus/arm-integrator-lm.c @@ -85,6 +85,7 @@ static int integrator_ap_lm_probe(struct platform_device *pdev) return -ENODEV; } map = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(map)) { dev_err(dev, "could not find Integrator/AP system controller\n"); From 18ed6a33188173b02a8b9b34304aced58d927d5f Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 20 Aug 2024 10:04:39 +0200 Subject: [PATCH 434/534] bus: mhi: host: pci_generic: Fix the name for the Telit FE990A commit bfc5ca0fd1ea7aceae0b682fa4bd8079c52f96c8 upstream. Add a mhi_pci_dev_info struct specific for the Telit FE990A modem in order to use the correct product name. Cc: stable@vger.kernel.org # 6.1+ Fixes: 0724869ede9c ("bus: mhi: host: pci_generic: add support for Telit FE990 modem") Signed-off-by: Fabio Porcedda Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240820080439.837666-1-fabio.porcedda@gmail.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/pci_generic.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index 08f3f039dbdd..154841916f56 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -578,6 +578,15 @@ static const struct mhi_pci_dev_info mhi_telit_fn990_info = { .mru_default = 32768, }; +static const struct mhi_pci_dev_info mhi_telit_fe990a_info = { + .name = "telit-fe990a", + .config = &modem_telit_fn990_config, + .bar_num = MHI_PCI_DEFAULT_BAR_NUM, + .dma_data_width = 32, + .sideband_wake = false, + .mru_default = 32768, +}; + /* Keep the list sorted based on the PID. New VID should be added as the last entry */ static const struct pci_device_id mhi_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304), @@ -595,9 +604,9 @@ static const struct pci_device_id mhi_pci_id_table[] = { /* Telit FN990 */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0308, 0x1c5d, 0x2010), .driver_data = (kernel_ulong_t) &mhi_telit_fn990_info }, - /* Telit FE990 */ + /* Telit FE990A */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0308, 0x1c5d, 0x2015), - .driver_data = (kernel_ulong_t) &mhi_telit_fn990_info }, + .driver_data = (kernel_ulong_t) &mhi_telit_fe990a_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0308), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx65_info }, { PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x1001), /* EM120R-GL (sdx24) */ From 7420c1bf7fc784e587b87329cc6dfa3dca537aa4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 28 Aug 2024 01:45:48 +0200 Subject: [PATCH 435/534] firmware_loader: Block path traversal commit f0e5311aa8022107d63c54e2f03684ec097d1394 upstream. Most firmware names are hardcoded strings, or are constructed from fairly constrained format strings where the dynamic parts are just some hex numbers or such. However, there are a couple codepaths in the kernel where firmware file names contain string components that are passed through from a device or semi-privileged userspace; the ones I could find (not counting interfaces that require root privileges) are: - lpfc_sli4_request_firmware_update() seems to construct the firmware filename from "ModelName", a string that was previously parsed out of some descriptor ("Vital Product Data") in lpfc_fill_vpd() - nfp_net_fw_find() seems to construct a firmware filename from a model name coming from nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"), which I think parses some descriptor that was read from the device. (But this case likely isn't exploitable because the format string looks like "netronome/nic_%s", and there shouldn't be any *folders* starting with "netronome/nic_". The previous case was different because there, the "%s" is *at the start* of the format string.) - module_flash_fw_schedule() is reachable from the ETHTOOL_MSG_MODULE_FW_FLASH_ACT netlink command, which is marked as GENL_UNS_ADMIN_PERM (meaning CAP_NET_ADMIN inside a user namespace is enough to pass the privilege check), and takes a userspace-provided firmware name. (But I think to reach this case, you need to have CAP_NET_ADMIN over a network namespace that a special kind of ethernet device is mapped into, so I think this is not a viable attack path in practice.) Fix it by rejecting any firmware names containing ".." path components. For what it's worth, I went looking and haven't found any USB device drivers that use the firmware loader dangerously. Cc: stable@vger.kernel.org Reviewed-by: Danilo Krummrich Fixes: abb139e75c2c ("firmware: teach the kernel to load firmware files directly from the filesystem") Signed-off-by: Jann Horn Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240828-firmware-traversal-v3-1-c76529c63b5f@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index b58c42f1b1ce..0b18c6b46e65 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -844,6 +844,26 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, {} #endif +/* + * Reject firmware file names with ".." path components. + * There are drivers that construct firmware file names from device-supplied + * strings, and we don't want some device to be able to tell us "I would like to + * be sent my firmware from ../../../etc/shadow, please". + * + * Search for ".." surrounded by either '/' or start/end of string. + * + * This intentionally only looks at the firmware name, not at the firmware base + * directory or at symlink contents. + */ +static bool name_contains_dotdot(const char *name) +{ + size_t name_len = strlen(name); + + return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 || + strstr(name, "/../") != NULL || + (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0); +} + /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, @@ -864,6 +884,14 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } + if (name_contains_dotdot(name)) { + dev_warn(device, + "Firmware load for '%s' refused, path contains '..' component\n", + name); + ret = -EINVAL; + goto out; + } + ret = _request_firmware_prepare(&fw, name, device, buf, size, offset, opt_flags); if (ret <= 0) /* error or already assigned */ @@ -941,6 +969,8 @@ out: * @name will be used as $FIRMWARE in the uevent environment and * should be distinctive enough not to be confused with any other * firmware image for this or any other device. + * It must not contain any ".." path components - "foo/bar..bin" is + * allowed, but "foo/../bar.bin" is not. * * Caller must hold the reference count of @device. * From f7ba350f4e7d5020ae1767d71705e61dae6700bc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 6 Sep 2024 15:54:33 -0700 Subject: [PATCH 436/534] tty: rp2: Fix reset with non forgiving PCIe host bridges commit f16dd10ba342c429b1e36ada545fb36d4d1f0e63 upstream. The write to RP2_GLOBAL_CMD followed by an immediate read of RP2_GLOBAL_CMD in rp2_reset_asic() is intented to flush out the write, however by then the device is already in reset and cannot respond to a memory cycle access. On platforms such as the Raspberry Pi 4 and others using the pcie-brcmstb.c driver, any memory access to a device that cannot respond is met with a fatal system error, rather than being substituted with all 1s as is usually the case on PC platforms. Swapping the delay and the read ensures that the device has finished resetting before we attempt to read from it. Fixes: 7d9f49afa451 ("serial: rp2: New driver for Comtrol RocketPort 2 cards") Cc: stable Suggested-by: Jim Quinlan Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20240906225435.707837-1-florian.fainelli@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/rp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c index de220ac8ca54..5a1de6044b38 100644 --- a/drivers/tty/serial/rp2.c +++ b/drivers/tty/serial/rp2.c @@ -578,8 +578,8 @@ static void rp2_reset_asic(struct rp2_card *card, unsigned int asic_id) u32 clk_cfg; writew(1, base + RP2_GLOBAL_CMD); - readw(base + RP2_GLOBAL_CMD); msleep(100); + readw(base + RP2_GLOBAL_CMD); writel(0, base + RP2_CLK_PRESCALER); /* TDM clock configuration */ From e29a1f8b74a9ae9c4c70fc4c001b5fed60f7367c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 5 Sep 2024 17:32:59 +0300 Subject: [PATCH 437/534] xhci: Set quirky xHC PCI hosts to D3 _after_ stopping and freeing them. commit f81dfa3b57c624c56f2bff171c431bc7f5b558f2 upstream. PCI xHC host should be stopped and xhci driver memory freed before putting host to PCI D3 state during PCI remove callback. Hosts with XHCI_SPURIOUS_WAKEUP quirk did this the wrong way around and set the host to D3 before calling usb_hcd_pci_remove(dev), which will access the host to stop it, and then free xhci. Fixes: f1f6d9a8b540 ("xhci: don't dereference a xhci member after removing xhci") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240905143300.1959279-12-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6cee705568c2..5c6fb9958211 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -721,8 +721,10 @@ put_runtime_pm: static void xhci_pci_remove(struct pci_dev *dev) { struct xhci_hcd *xhci; + bool set_power_d3; xhci = hcd_to_xhci(pci_get_drvdata(dev)); + set_power_d3 = xhci->quirks & XHCI_SPURIOUS_WAKEUP; xhci->xhc_state |= XHCI_STATE_REMOVING; @@ -735,11 +737,11 @@ static void xhci_pci_remove(struct pci_dev *dev) xhci->shared_hcd = NULL; } - /* Workaround for spurious wakeups at shutdown with HSW */ - if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - pci_set_power_state(dev, PCI_D3hot); - usb_hcd_pci_remove(dev); + + /* Workaround for spurious wakeups at shutdown with HSW */ + if (set_power_d3) + pci_set_power_state(dev, PCI_D3hot); } /* From 633bd1d6be1a2e634d6411b546335432548cedc4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Sep 2024 15:13:29 +0200 Subject: [PATCH 438/534] serial: qcom-geni: fix fifo polling timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c80ee36ac8f9e9c27d8e097a2eaaf198e7534c83 upstream. The qcom_geni_serial_poll_bit() can be used to wait for events like command completion and is supposed to wait for the time it takes to clear a full fifo before timing out. As noted by Doug, the current implementation does not account for start, stop and parity bits when determining the timeout. The helper also does not currently account for the shift register and the two-word intermediate transfer register. A too short timeout can specifically lead to lost characters when waiting for a transfer to complete as the transfer is cancelled on timeout. Instead of determining the poll timeout on every call, store the fifo timeout when updating it in set_termios() and make sure to take the shift and intermediate registers into account. Note that serial core has already added a 20 ms margin to the fifo timeout. Also note that the current uart_fifo_timeout() interface does unnecessary calculations on every call and did not exist in earlier kernels so only store its result once. This facilitates backports too as earlier kernels can derive the timeout from uport->timeout, which has since been removed. Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP") Cc: stable@vger.kernel.org # 4.17 Reported-by: Douglas Anderson Tested-by: Nícolas F. R. A. Prado Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240906131336.23625-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 31 +++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 2e1b1c827dfe..f798ef3c4130 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -124,7 +124,7 @@ struct qcom_geni_serial_port { dma_addr_t tx_dma_addr; dma_addr_t rx_dma_addr; bool setup; - unsigned int baud; + unsigned long poll_timeout_us; unsigned long clk_rate; void *rx_buf; u32 loopback; @@ -270,22 +270,13 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport, { u32 reg; struct qcom_geni_serial_port *port; - unsigned int baud; - unsigned int fifo_bits; unsigned long timeout_us = 20000; struct qcom_geni_private_data *private_data = uport->private_data; if (private_data->drv) { port = to_dev_port(uport); - baud = port->baud; - if (!baud) - baud = 115200; - fifo_bits = port->tx_fifo_depth * port->tx_fifo_width; - /* - * Total polling iterations based on FIFO worth of bytes to be - * sent at current baud. Add a little fluff to the wait. - */ - timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500; + if (port->poll_timeout_us) + timeout_us = port->poll_timeout_us; } /* @@ -1223,11 +1214,11 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, unsigned long clk_rate; u32 ver, sampling_rate; unsigned int avg_bw_core; + unsigned long timeout; qcom_geni_serial_stop_rx(uport); /* baud rate */ baud = uart_get_baud_rate(uport, termios, old, 300, 4000000); - port->baud = baud; sampling_rate = UART_OVERSAMPLING; /* Sampling rate is halved for IP versions >= 2.5 */ @@ -1305,9 +1296,21 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, else tx_trans_cfg |= UART_CTS_MASK; - if (baud) + if (baud) { uart_update_timeout(uport, termios->c_cflag, baud); + /* + * Make sure that qcom_geni_serial_poll_bitfield() waits for + * the FIFO, two-word intermediate transfer register and shift + * register to clear. + * + * Note that uart_fifo_timeout() also adds a 20 ms margin. + */ + timeout = jiffies_to_usecs(uart_fifo_timeout(uport)); + timeout += 3 * timeout / port->tx_fifo_depth; + WRITE_ONCE(port->poll_timeout_us, timeout); + } + if (!uart_console(uport)) writel(port->loopback, uport->membase + SE_UART_LOOPBACK_CFG); From a3028d70a563631fe31fb84ae272e24163194c9e Mon Sep 17 00:00:00 2001 From: Pavan Kumar Paluri Date: Thu, 15 Aug 2024 07:25:00 -0500 Subject: [PATCH 439/534] crypto: ccp - Properly unregister /dev/sev on sev PLATFORM_STATUS failure commit ce3d2d6b150ba8528f3218ebf0cee2c2c572662d upstream. In case of sev PLATFORM_STATUS failure, sev_get_api_version() fails resulting in sev_data field of psp_master nulled out. This later becomes a problem when unloading the ccp module because the device has not been unregistered (via misc_deregister()) before clearing the sev_data field of psp_master. As a result, on reloading the ccp module, a duplicate device issue is encountered as can be seen from the dmesg log below. on reloading ccp module via modprobe ccp Call Trace: dump_stack_lvl+0xd7/0xf0 dump_stack+0x10/0x20 sysfs_warn_dup+0x5c/0x70 sysfs_create_dir_ns+0xbc/0xd kobject_add_internal+0xb1/0x2f0 kobject_add+0x7a/0xe0 ? srso_alias_return_thunk+0x5/0xfbef5 ? get_device_parent+0xd4/0x1e0 ? __pfx_klist_children_get+0x10/0x10 device_add+0x121/0x870 ? srso_alias_return_thunk+0x5/0xfbef5 device_create_groups_vargs+0xdc/0x100 device_create_with_groups+0x3f/0x60 misc_register+0x13b/0x1c0 sev_dev_init+0x1d4/0x290 [ccp] psp_dev_init+0x136/0x300 [ccp] sp_init+0x6f/0x80 [ccp] sp_pci_probe+0x2a6/0x310 [ccp] ? srso_alias_return_thunk+0x5/0xfbef5 local_pci_probe+0x4b/0xb0 work_for_cpu_fn+0x1a/0x30 process_one_work+0x203/0x600 worker_thread+0x19e/0x350 ? __pfx_worker_thread+0x10/0x10 kthread+0xeb/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x3c/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 kobject: kobject_add_internal failed for sev with -EEXIST, don't try to register things with the same name in the same directory. ccp 0000:22:00.1: sev initialization failed ccp 0000:22:00.1: psp initialization failed ccp 0000:a2:00.1: no command queues available ccp 0000:a2:00.1: psp enabled Address this issue by unregistering the /dev/sev before clearing out sev_data in case of PLATFORM_STATUS failure. Fixes: 200664d5237f ("crypto: ccp: Add Secure Encrypted Virtualization (SEV) command support") Cc: stable@vger.kernel.org Signed-off-by: Pavan Kumar Paluri Acked-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/sev-dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 17fb01853dbf..07e6f782b622 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1367,6 +1367,8 @@ void sev_pci_init(void) return; err: + sev_dev_destroy(psp_master); + psp_master->sev_data = NULL; } From 42ac42d79039c95db78372d4e9da98ca23eb967e Mon Sep 17 00:00:00 2001 From: Qiu-ji Chen Date: Fri, 13 Sep 2024 16:35:04 +0800 Subject: [PATCH 440/534] drbd: Fix atomicity violation in drbd_uuid_set_bm() commit 2f02b5af3a4482b216e6a466edecf6ba8450fa45 upstream. The violation of atomicity occurs when the drbd_uuid_set_bm function is executed simultaneously with modifying the value of device->ldev->md.uuid[UI_BITMAP]. Consider a scenario where, while device->ldev->md.uuid[UI_BITMAP] passes the validity check when its value is not zero, the value of device->ldev->md.uuid[UI_BITMAP] is written to zero. In this case, the check in drbd_uuid_set_bm might refer to the old value of device->ldev->md.uuid[UI_BITMAP] (before locking), which allows an invalid value to pass the validity check, resulting in inconsistency. To address this issue, it is recommended to include the data validity check within the locked section of the function. This modification ensures that the value of device->ldev->md.uuid[UI_BITMAP] does not change during the validation process, thereby maintaining its integrity. This possible bug is found by an experimental static analysis tool developed by our team. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. Fixes: 9f2247bb9b75 ("drbd: Protect accesses to the uuid set with a spinlock") Cc: stable@vger.kernel.org Signed-off-by: Qiu-ji Chen Reviewed-by: Philipp Reisner Link: https://lore.kernel.org/r/20240913083504.10549-1-chenqiuji666@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6bc86106c7b2..102cc3034412 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3392,10 +3392,12 @@ void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local) void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) { unsigned long flags; - if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) - return; - spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); + if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) { + spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); + return; + } + if (val == 0) { drbd_uuid_move_history(device); device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; From df6a82a6b06e20ead654c8b2428a1acef61040f8 Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Mon, 9 Sep 2024 09:37:36 -0400 Subject: [PATCH 441/534] drbd: Add NULL check for net_conf to prevent dereference in state validation commit a5e61b50c9f44c5edb6e134ede6fee8806ffafa9 upstream. If the net_conf pointer is NULL and the code attempts to access its fields without a check, it will lead to a null pointer dereference. Add a NULL check before dereferencing the pointer. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 44ed167da748 ("drbd: rcu_read_lock() and rcu_dereference() for tconn->net_conf") Cc: stable@vger.kernel.org Signed-off-by: Mikhail Lobanov Link: https://lore.kernel.org/r/20240909133740.84297-1-m.lobanov@rosalinux.ru Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 287a8d1d3f70..87cf5883078f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -876,7 +876,7 @@ is_valid_state(struct drbd_device *device, union drbd_state ns) ns.disk == D_OUTDATED) rv = SS_CONNECTED_OUTDATES; - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + else if (nc && (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && (nc->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; From f0921ecd4ddc14646bb5511f49db4d7d3b0829f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 9 Jul 2024 22:37:24 +0200 Subject: [PATCH 442/534] ACPI: sysfs: validate return type of _STR method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4bb1e7d027413835b086aed35bc3f0713bc0f72b upstream. Only buffer objects are valid return values of _STR. If something else is returned description_show() will access invalid memory. Fixes: d1efe3c324ea ("ACPI: Add new sysfs interface to export device description") Cc: All applicable Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240709-acpi-sysfs-groups-v2-1-058ab0667fa8@weissschuh.net Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index a34d8578b3da..6ed5e9e56be2 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -544,8 +544,9 @@ int acpi_device_setup_files(struct acpi_device *dev) * If device has _STR, 'description' file is created */ if (acpi_has_method(dev->handle, "_STR")) { - status = acpi_evaluate_object(dev->handle, "_STR", - NULL, &buffer); + status = acpi_evaluate_object_typed(dev->handle, "_STR", + NULL, &buffer, + ACPI_TYPE_BUFFER); if (ACPI_FAILURE(status)) buffer.pointer = NULL; dev->pnp.str_obj = buffer.pointer; From ca659f3804699504404996c75d469938a823cee0 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 10 Sep 2024 11:40:06 +0200 Subject: [PATCH 443/534] ACPI: resource: Add another DMI match for the TongFang GMxXGxx commit a98cfe6ff15b62f94a44d565607a16771c847bc6 upstream. Internal documentation suggest that the TUXEDO Polaris 15 Gen5 AMD might have GMxXGxX as the board name instead of GMxXGxx. Adding both to be on the safe side. Signed-off-by: Werner Sembach Cc: All applicable Link: https://patch.msgid.link/20240910094008.1601230-1-wse@tuxedocomputers.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 15f9d3d9c1cb..3a13b22c8d9a 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -508,6 +508,12 @@ static const struct dmi_system_id maingear_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "GMxXGxx"), }, }, + { + /* TongFang GMxXGxX/TUXEDO Polaris 15 Gen5 AMD */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxXGxX"), + }, + }, { /* TongFang GMxXGxx sold as Eluktronics Inc. RP-15 */ .matches = { From 19fd2f2c5fb36b61506d3208474bfd8fdf1cada3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Sep 2024 17:45:49 +0200 Subject: [PATCH 444/534] efistub/tpm: Use ACPI reclaim memory for event log to avoid corruption commit 77d48d39e99170b528e4f2e9fc5d1d64cdedd386 upstream. The TPM event log table is a Linux specific construct, where the data produced by the GetEventLog() boot service is cached in memory, and passed on to the OS using an EFI configuration table. The use of EFI_LOADER_DATA here results in the region being left unreserved in the E820 memory map constructed by the EFI stub, and this is the memory description that is passed on to the incoming kernel by kexec, which is therefore unaware that the region should be reserved. Even though the utility of the TPM2 event log after a kexec is questionable, any corruption might send the parsing code off into the weeds and crash the kernel. So let's use EFI_ACPI_RECLAIM_MEMORY instead, which is always treated as reserved by the E820 conversion logic. Cc: Reported-by: Breno Leitao Tested-by: Usama Arif Reviewed-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index 7acbac16eae0..95da291c3083 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -115,7 +115,7 @@ void efi_retrieve_tpm2_eventlog(void) } /* Allocate space for the logs and copy them. */ - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*log_tbl) + log_size, (void **)&log_tbl); if (status != EFI_SUCCESS) { From cb0125ec3d996158a91dfbd1c539f159a81e4ffc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 15 Jul 2024 19:07:00 +0300 Subject: [PATCH 445/534] perf/x86/intel/pt: Fix sampling synchronization commit d92792a4b26e50b96ab734cbe203d8a4c932a7a9 upstream. pt_event_snapshot_aux() uses pt->handle_nmi to determine if tracing needs to be stopped, however tracing can still be going because pt->handle_nmi is set to zero before tracing is stopped in pt_event_stop, whereas pt_event_snapshot_aux() requires that tracing must be stopped in order to copy a sample of trace from the buffer. Instead call pt_config_stop() always, which anyway checks config for RTIT_CTL_TRACEEN and does nothing if it is already clear. Note pt_event_snapshot_aux() can continue to use pt->handle_nmi to determine if the trace needs to be restarted afterwards. Fixes: 25e8920b301c ("perf/x86/intel/pt: Add sampling support") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20240715160712.127117-2-adrian.hunter@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/pt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index cc5c6a326496..4110246aba12 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1602,6 +1602,7 @@ static void pt_event_stop(struct perf_event *event, int mode) * see comment in intel_pt_interrupt(). */ WRITE_ONCE(pt->handle_nmi, 0); + barrier(); pt_config_stop(event); @@ -1653,11 +1654,10 @@ static long pt_event_snapshot_aux(struct perf_event *event, return 0; /* - * Here, handle_nmi tells us if the tracing is on + * There is no PT interrupt in this mode, so stop the trace and it will + * remain stopped while the buffer is copied. */ - if (READ_ONCE(pt->handle_nmi)) - pt_config_stop(event); - + pt_config_stop(event); pt_read_offset(buf); pt_update_head(pt); @@ -1669,11 +1669,10 @@ static long pt_event_snapshot_aux(struct perf_event *event, ret = perf_output_copy_aux(&pt->handle, handle, from, to); /* - * If the tracing was on when we turned up, restart it. - * Compiler barrier not needed as we couldn't have been - * preempted by anything that touches pt->handle_nmi. + * Here, handle_nmi tells us if the tracing was on. + * If the tracing was on, restart it. */ - if (pt->handle_nmi) + if (READ_ONCE(pt->handle_nmi)) pt_config_start(event); return ret; From 0a74a9b148d333c74b4a0754b3986087bd5a104e Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 3 Sep 2024 09:44:55 +0800 Subject: [PATCH 446/534] wifi: mt76: mt7921: Check devm_kasprintf() returned value commit 1ccc9e476ce76e8577ba4fdbd1f63cb3e3499d38 upstream. devm_kasprintf() can return a NULL pointer on failure but this returned value is not checked. Fix this lack and check the returned value. Found by code review. Cc: stable@vger.kernel.org Fixes: 6ae39b7c7ed4 ("wifi: mt76: mt7921: Support temp sensor") Signed-off-by: Ma Ke Reviwed-by: Matthias Brugger Link: https://patch.msgid.link/20240903014455.4144536-1-make24@iscas.ac.cn Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7921/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c index ff63f37f67d9..61de6b03fa0b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c @@ -52,6 +52,8 @@ static int mt7921_thermal_init(struct mt792x_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7921_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy, mt7921_hwmon_groups); From cf23427dd75bade31ab0aac4be873df17154a1f0 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 3 Sep 2024 09:49:55 +0800 Subject: [PATCH 447/534] wifi: mt76: mt7915: check devm_kasprintf() returned value commit 267efeda8c55f30e0e7c5b7fd03dea4efec6916c upstream. devm_kasprintf() can return a NULL pointer on failure but this returned value is not checked. Fix this lack and check the returned value. Found by code review. Cc: stable@vger.kernel.org Fixes: 6ae39b7c7ed4 ("wifi: mt76: mt7921: Support temp sensor") Signed-off-by: Ma Ke Reviewed-by: Matthias Brugger Link: https://patch.msgid.link/20240903014955.4145423-1-make24@iscas.ac.cn Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 35fdf4f98d80..e6af7318a9e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -194,6 +194,8 @@ static int mt7915_thermal_init(struct mt7915_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7915_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; cdev = thermal_cooling_device_register(name, phy, &mt7915_thermal_ops); if (!IS_ERR(cdev)) { From 8e4b60ae8a047ad2fb175fcfdd54feee80983a45 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 13 Aug 2024 16:12:42 +0800 Subject: [PATCH 448/534] wifi: mt76: mt7996: fix NULL pointer dereference in mt7996_mcu_sta_bfer_he commit f503ae90c7355e8506e68498fe84c1357894cd5b upstream. Fix the NULL pointer dereference in mt7996_mcu_sta_bfer_he routine adding an sta interface to the mt7996 driver. Found by code review. Cc: stable@vger.kernel.org Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Ma Ke Link: https://patch.msgid.link/20240813081242.3991814-1-make24@iscas.ac.cn Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index b50743f7e030..302171e10359 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1300,6 +1300,9 @@ mt7996_mcu_sta_bfer_he(struct ieee80211_sta *sta, struct ieee80211_vif *vif, u8 nss_mcs = mt7996_mcu_get_sta_nss(mcs_map); u8 snd_dim, sts; + if (!vc) + return; + bf->tx_mode = MT_PHY_TYPE_HE_SU; mt7996_mcu_sta_sounding_rate(bf); From de0cb07dc2c3ccdc44496539376973c2977afcac Mon Sep 17 00:00:00 2001 From: Nick Morrow Date: Thu, 11 Jul 2024 01:14:23 +0300 Subject: [PATCH 449/534] wifi: rtw88: 8821cu: Remove VID/PID 0bda:c82c commit 0af8cd2822f31ed8363223329e5cff2a7ed01961 upstream. Remove VID/PID 0bda:c82c as it was inadvertently added to the device list in driver rtw8821cu. This VID/PID is for the rtw8822cu device and it is already in the appropriate place for that device. Cc: stable@vger.kernel.org Signed-off-by: Nick Morrow Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/335d7fa1-0ba5-4b86-bba5-f98834ace1f8@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/rtw8821cu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821cu.c b/drivers/net/wireless/realtek/rtw88/rtw8821cu.c index e2c7d9f87683..a019f4085e73 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821cu.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821cu.c @@ -31,8 +31,6 @@ static const struct usb_device_id rtw_8821cu_id_table[] = { .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc82b, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, - { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xc82c, 0xff, 0xff, 0xff), - .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x331d, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&(rtw8821c_hw_spec) }, /* D-Link */ { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xc811, 0xff, 0xff, 0xff), From eed8db8203a8ba6eccd846ba7d078733c4f1a5ee Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Tue, 23 Jul 2024 22:31:36 +0300 Subject: [PATCH 450/534] wifi: rtw88: 8822c: Fix reported RX band width commit a71ed5898dfae68262f79277915d1dfe34586bc6 upstream. "iw dev wlp2s0 station dump" shows incorrect rx bitrate: tx bitrate: 866.7 MBit/s VHT-MCS 9 80MHz short GI VHT-NSS 2 rx bitrate: 86.7 MBit/s VHT-MCS 9 VHT-NSS 1 This is because the RX band width is calculated incorrectly. Fix the calculation according to the phydm_rxsc_2_bw() function from the official drivers. After: tx bitrate: 866.7 MBit/s VHT-MCS 9 80MHz short GI VHT-NSS 2 rx bitrate: 390.0 MBit/s VHT-MCS 9 80MHz VHT-NSS 1 It also works correctly with the AP configured for 20 MHz and 40 MHz. Tested with RTL8822CE. Cc: stable@vger.kernel.org Fixes: e3037485c68e ("rtw88: new Realtek 802.11ac driver") Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/bca8949b-e2bd-4515-98fd-70d3049a0097@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/rtw8822c.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index cd965edc29ce..3fe5c70ce731 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -2611,12 +2611,14 @@ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status, else rxsc = GET_PHY_STAT_P1_HT_RXSC(phy_status); - if (rxsc >= 9 && rxsc <= 12) - bw = RTW_CHANNEL_WIDTH_40; - else if (rxsc >= 13) - bw = RTW_CHANNEL_WIDTH_80; - else + if (rxsc == 0) + bw = rtwdev->hal.current_band_width; + else if (rxsc >= 1 && rxsc <= 8) bw = RTW_CHANNEL_WIDTH_20; + else if (rxsc >= 9 && rxsc <= 12) + bw = RTW_CHANNEL_WIDTH_40; + else + bw = RTW_CHANNEL_WIDTH_80; channel = GET_PHY_STAT_P1_CHANNEL(phy_status); rtw_set_rx_freq_band(pkt_stat, channel); From c1ba1f2ca1b89f56e9be95b5480cc75b35a02aea Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 5 Sep 2024 09:47:53 +0800 Subject: [PATCH 451/534] wifi: mt76: mt7615: check devm_kasprintf() returned value commit 5acdc432f832d810e0d638164c393b877291d9b4 upstream. devm_kasprintf() can return a NULL pointer on failure but this returned value is not checked. Fix this lack and check the returned value. Found by code review. Cc: stable@vger.kernel.org Fixes: 0bb4e9187ea4 ("mt76: mt7615: fix hwmon temp sensor mem use-after-free") Signed-off-by: Ma Ke Reviewed-by: Matthias Brugger Link: https://patch.msgid.link/20240905014753.353271-1-make24@iscas.ac.cn Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7615/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c index 18a50ccff106..f22a1aa88505 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c @@ -56,6 +56,9 @@ int mt7615_thermal_init(struct mt7615_dev *dev) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7615_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; + hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, dev, mt7615_hwmon_groups); if (IS_ERR(hwmon)) From 534230eebae5e8c99fe26d4e6ef5d37b54ffd18a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 4 Sep 2024 21:39:40 +0800 Subject: [PATCH 452/534] debugobjects: Fix conditions in fill_pool() commit 684d28feb8546d1e9597aa363c3bfcf52fe250b7 upstream. fill_pool() uses 'obj_pool_min_free' to decide whether objects should be handed back to the kmem cache. But 'obj_pool_min_free' records the lowest historical value of the number of objects in the object pool and not the minimum number of objects which should be kept in the pool. Use 'debug_objects_pool_min_level' instead, which holds the minimum number which was scaled to the number of CPUs at boot time. [ tglx: Massage change log ] Fixes: d26bf5056fc0 ("debugobjects: Reduce number of pool_lock acquisitions in fill_pool()") Fixes: 36c4ead6f6df ("debugobjects: Add global free list and the counter") Signed-off-by: Zhen Lei Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240904133944.2124-3-thunder.leizhen@huawei.com Signed-off-by: Greg Kroah-Hartman --- lib/debugobjects.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index c90834a209b5..9d401355d560 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -141,13 +141,14 @@ static void fill_pool(void) * READ_ONCE()s pair with the WRITE_ONCE()s in pool_lock critical * sections. */ - while (READ_ONCE(obj_nr_tofree) && (READ_ONCE(obj_pool_free) < obj_pool_min_free)) { + while (READ_ONCE(obj_nr_tofree) && + READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) { raw_spin_lock_irqsave(&pool_lock, flags); /* * Recheck with the lock held as the worker thread might have * won the race and freed the global free list already. */ - while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { + while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) { obj = hlist_entry(obj_to_free.first, typeof(*obj), node); hlist_del(&obj->node); WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); From 6a6a5751c06a1d166daab9bccb0d2f72e8a0860f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 11 Sep 2024 07:06:45 +0930 Subject: [PATCH 453/534] btrfs: tree-checker: fix the wrong output of data backref objectid commit b0b595e61d97de61c15b379b754b2caa90e83e5c upstream. [BUG] There are some reports about invalid data backref objectids, the report looks like this: BTRFS critical (device sda): corrupt leaf: block=333654787489792 slot=110 extent bytenr=333413935558656 len=65536 invalid data ref objectid value 2543 The data ref objectid is the inode number inside the subvolume. But in above case, the value is completely sane, not really showing the problem. [CAUSE] The root cause of the problem is the deprecated feature, inode cache. This feature results a special inode number, -12ULL, and it's no longer recognized by tree-checker, triggering the error. The direct problem here is the output of data ref objectid. The value shown is in fact the dref_root (subvolume id), not the dref_objectid (inode number). [FIX] Fix the output to use dref_objectid instead. Reported-by: Neil Parton Reported-by: Archange Link: https://lore.kernel.org/linux-btrfs/CAAYHqBbrrgmh6UmW3ANbysJX9qG9Pbg3ZwnKsV=5mOpv_qix_Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-btrfs/9541deea-9056-406e-be16-a996b549614d@archlinux.org/ Fixes: f333a3c7e832 ("btrfs: tree-checker: validate dref root and objectid") CC: stable@vger.kernel.org # 6.11 Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 46c1f7498395..3a8ec33a1204 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1493,7 +1493,7 @@ static int check_extent_item(struct extent_buffer *leaf, dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { extent_err(leaf, slot, "invalid data ref objectid value %llu", - dref_root); + dref_objectid); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(dref_offset, From 4adf6514949f76bb6ae34403d1b5fa6a93499ecd Mon Sep 17 00:00:00 2001 From: Luca Stefani Date: Mon, 2 Sep 2024 13:10:53 +0200 Subject: [PATCH 454/534] btrfs: always update fstrim_range on failure in FITRIM ioctl commit 3368597206dc3c6c3c2247ee146beada14c67380 upstream. Even in case of failure we could've discarded some data and userspace should be made aware of it, so copy fstrim_range to userspace regardless. Also make sure to update the trimmed bytes amount even if btrfs_trim_free_extents fails. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Luca Stefani Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/ioctl.c | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72851adc1fee..50d795a2542c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6175,13 +6175,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) continue; ret = btrfs_trim_free_extents(device, &group_trimmed); + + trimmed += group_trimmed; if (ret) { dev_failed++; dev_ret = ret; break; } - - trimmed += group_trimmed; } mutex_unlock(&fs_devices->device_list_mutex); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5297523d497..5f0c9c3f3bbf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -533,13 +533,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); - if (ret < 0) - return ret; if (copy_to_user(arg, &range, sizeof(range))) return -EFAULT; - return 0; + return ret; } int __pure btrfs_is_empty_uuid(u8 *uuid) From b18a5c8382c80c6a3b25f16907ce0aca78cbc1ff Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:28:38 -0700 Subject: [PATCH 455/534] f2fs: fix several potential integer overflows in file offsets commit 1cade98cf6415897bf9342ee451cc5b40b58c638 upstream. When dealing with large extents and calculating file offsets by summing up according extent offsets and lengths of unsigned int type, one may encounter possible integer overflow if the values are big enough. Prevent this from happening by expanding one of the addends to (pgoff_t) type. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: d323d005ac4a ("f2fs: support file defragment") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/file.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 6a9a470345bf..d6fb053b6dfb 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -367,7 +367,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; @@ -457,7 +457,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f2969d860342..cb7afdcbe45e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2687,7 +2687,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } From 0c598a0217183aa17fab3e556ee2351c2c9874f0 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:05:44 -0700 Subject: [PATCH 456/534] f2fs: prevent possible int overflow in dir_block_index() commit 47f268f33dff4a5e31541a990dc09f116f80e61c upstream. The result of multiplication between values derived from functions dir_buckets() and bucket_blocks() *could* technically reach 2^30 * 2^2 = 2^32. While unlikely to happen, it is prudent to ensure that it will not lead to integer overflow. Thus, use mul_u32_u32() as it's more appropriate to mitigate the issue. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 3843154598a0 ("f2fs: introduce large directory support") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c624ffff6f19..166ec8942595 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -157,7 +157,8 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } From 56d8651679921d70f1fb81fa78680624457d9306 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:51:58 -0700 Subject: [PATCH 457/534] f2fs: avoid potential int overflow in sanity_check_area_boundary() commit 50438dbc483ca6a133d2bce9d5d6747bcee38371 upstream. While calculating the end addresses of main area and segment 0, u32 may be not enough to hold the result without the danger of int overflow. Just in case, play it safe and cast one of the operands to a wider type (u64). Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: fd694733d523 ("f2fs: cover large section in sanity check of super") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 37fa7e32835a..540fa1dfc77d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3336,9 +3336,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", From 5e0de753bfe87768ebe6744d869caa92f35e5731 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Aug 2024 16:07:16 +0200 Subject: [PATCH 458/534] f2fs: Require FMODE_WRITE for atomic write ioctls commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream. The F2FS ioctls for starting and committing atomic writes check for inode_owner_or_capable(), but this does not give LSMs like SELinux or Landlock an opportunity to deny the write access - if the caller's FSUID matches the inode's UID, inode_owner_or_capable() immediately returns true. There are scenarios where LSMs want to deny a process the ability to write particular files, even files that the FSUID of the process owns; but this can currently partially be bypassed using atomic write ioctls in two ways: - F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can truncate an inode to size 0 - F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert changes another process concurrently made to a file Fix it by requiring FMODE_WRITE for these operations, just like for F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these ioctls when intending to write into the file, that seems unlikely to break anything. Fixes: 88b88a667971 ("f2fs: support atomic writes") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Reviewed-by: Chao Yu Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cb7afdcbe45e..48cdb6c3ab21 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2100,6 +2100,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2206,6 +2209,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2238,6 +2244,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; From 7cb51731f24b216b0b87942f519f2c67a17107ee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Sep 2024 11:20:47 +0800 Subject: [PATCH 459/534] f2fs: fix to check atomic_file in f2fs ioctl interfaces commit bfe5c02654261bfb8bd9cb174a67f3279ea99e58 upstream. Some f2fs ioctl interfaces like f2fs_ioc_set_pin_file(), f2fs_move_file_range(), and f2fs_defragment_range() missed to check atomic_write status, which may cause potential race issue, fix it. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 48cdb6c3ab21..74fac935bd09 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2673,7 +2673,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2904,6 +2905,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -3288,6 +3294,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); From 5ad4d0b64820ae6f123261060dffe3f11cb023ff Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 26 Aug 2024 15:04:15 +0800 Subject: [PATCH 460/534] hwrng: mtk - Use devm_pm_runtime_enable commit 78cb66caa6ab5385ac2090f1aae5f3c19e08f522 upstream. Replace pm_runtime_enable with the devres-enabled version which can trigger pm_runtime_disable. Otherwise, the below appears during reload driver. mtk_rng 1020f000.rng: Unbalanced pm_runtime_enable! Fixes: 81d2b34508c6 ("hwrng: mtk - add runtime PM support") Cc: Suggested-by: Chen-Yu Tsai Signed-off-by: Guoqing Jiang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/mtk-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index aa993753ab12..1e3048f2bb38 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -142,7 +142,7 @@ static int mtk_rng_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, priv); pm_runtime_set_autosuspend_delay(&pdev->dev, RNG_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); + devm_pm_runtime_enable(&pdev->dev); dev_info(&pdev->dev, "registered RNG driver\n"); From 3fd8e444e82469b758c2572868b62c7bcf962a70 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 3 Aug 2024 14:49:22 +0800 Subject: [PATCH 461/534] hwrng: bcm2835 - Add missing clk_disable_unprepare in bcm2835_rng_init commit d57e2f7cffd57fe2800332dec768ec1b67a4159f upstream. Add the missing clk_disable_unprepare() before return in bcm2835_rng_init(). Fixes: e5f9f41d5e62 ("hwrng: bcm2835 - add reset support") Cc: Signed-off-by: Gaosheng Cui Reviewed-by: Florian Fainelli Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/bcm2835-rng.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index 4c08efe7f375..57a80ec93bad 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -94,8 +94,10 @@ static int bcm2835_rng_init(struct hwrng *rng) return ret; ret = reset_control_reset(priv->reset); - if (ret) + if (ret) { + clk_disable_unprepare(priv->clk); return ret; + } if (priv->mask_interrupts) { /* mask the interrupt */ From 1b4089d567786c2e1e2defdafc693cca9b6ed1d4 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 3 Aug 2024 14:49:23 +0800 Subject: [PATCH 462/534] hwrng: cctrng - Add missing clk_disable_unprepare in cctrng_resume commit 4b7acc85de14ee8a2236f54445dc635d47eceac0 upstream. Add the missing clk_disable_unprepare() before return in cctrng_resume(). Fixes: a583ed310bb6 ("hwrng: cctrng - introduce Arm CryptoCell driver") Cc: Signed-off-by: Gaosheng Cui Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/cctrng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c index 1abbff04a015..a55f5f2d35df 100644 --- a/drivers/char/hw_random/cctrng.c +++ b/drivers/char/hw_random/cctrng.c @@ -624,6 +624,7 @@ static int __maybe_unused cctrng_resume(struct device *dev) /* wait for Cryptocell reset completion */ if (!cctrng_wait_for_reset_completion(drvdata)) { dev_err(dev, "Cryptocell reset not completed"); + clk_disable_unprepare(drvdata->clk); return -EBUSY; } From 93e1215f3fe08fd8b17906522127a0e646f6ad69 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Tue, 10 Sep 2024 11:50:16 +0300 Subject: [PATCH 463/534] arm64: esr: Define ESR_ELx_EC_* constants as UL commit b6db3eb6c373b97d9e433530d748590421bbeea7 upstream. Add explicit casting to prevent expantion of 32th bit of u32 into highest half of u64 in several places. For example, in inject_abt64: ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT = 0x24 << 26. This operation's result is int with 1 in 32th bit. While casting this value into u64 (esr is u64) 1 fills 32 highest bits. Found by Linux Verification Center (linuxtesting.org) with SVACE. Cc: Fixes: aa8eff9bfbd5 ("arm64: KVM: fault injection into a guest") Signed-off-by: Anastasia Belova Acked-by: Marc Zyngier Link: https://lore.kernel.org/stable/20240910085016.32120-1-abelova%40astralinux.ru Link: https://lore.kernel.org/r/20240910085016.32120-1-abelova@astralinux.ru Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/esr.h | 88 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ae35939f395b..1cdae1b4f03b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -10,63 +10,63 @@ #include #include -#define ESR_ELx_EC_UNKNOWN (0x00) -#define ESR_ELx_EC_WFx (0x01) +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) /* Unallocated EC: 0x02 */ -#define ESR_ELx_EC_CP15_32 (0x03) -#define ESR_ELx_EC_CP15_64 (0x04) -#define ESR_ELx_EC_CP14_MR (0x05) -#define ESR_ELx_EC_CP14_LS (0x06) -#define ESR_ELx_EC_FP_ASIMD (0x07) -#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ -#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ /* Unallocated EC: 0x0A - 0x0B */ -#define ESR_ELx_EC_CP14_64 (0x0C) -#define ESR_ELx_EC_BTI (0x0D) -#define ESR_ELx_EC_ILL (0x0E) +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) /* Unallocated EC: 0x0F - 0x10 */ -#define ESR_ELx_EC_SVC32 (0x11) -#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ -#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ /* Unallocated EC: 0x14 */ -#define ESR_ELx_EC_SVC64 (0x15) -#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ -#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ -#define ESR_ELx_EC_SYS64 (0x18) -#define ESR_ELx_EC_SVE (0x19) -#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ -#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -#define ESR_ELx_EC_SME (0x1D) +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) /* Unallocated EC: 0x1E */ -#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ -#define ESR_ELx_EC_IABT_LOW (0x20) -#define ESR_ELx_EC_IABT_CUR (0x21) -#define ESR_ELx_EC_PC_ALIGN (0x22) +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) /* Unallocated EC: 0x23 */ -#define ESR_ELx_EC_DABT_LOW (0x24) -#define ESR_ELx_EC_DABT_CUR (0x25) -#define ESR_ELx_EC_SP_ALIGN (0x26) -#define ESR_ELx_EC_MOPS (0x27) -#define ESR_ELx_EC_FP_EXC32 (0x28) +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) /* Unallocated EC: 0x29 - 0x2B */ -#define ESR_ELx_EC_FP_EXC64 (0x2C) +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) /* Unallocated EC: 0x2D - 0x2E */ -#define ESR_ELx_EC_SERROR (0x2F) -#define ESR_ELx_EC_BREAKPT_LOW (0x30) -#define ESR_ELx_EC_BREAKPT_CUR (0x31) -#define ESR_ELx_EC_SOFTSTP_LOW (0x32) -#define ESR_ELx_EC_SOFTSTP_CUR (0x33) -#define ESR_ELx_EC_WATCHPT_LOW (0x34) -#define ESR_ELx_EC_WATCHPT_CUR (0x35) +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) /* Unallocated EC: 0x36 - 0x37 */ -#define ESR_ELx_EC_BKPT32 (0x38) +#define ESR_ELx_EC_BKPT32 UL(0x38) /* Unallocated EC: 0x39 */ -#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ /* Unallocated EC: 0x3B */ -#define ESR_ELx_EC_BRK64 (0x3C) +#define ESR_ELx_EC_BRK64 UL(0x3C) /* Unallocated EC: 0x3D - 0x3F */ -#define ESR_ELx_EC_MAX (0x3F) +#define ESR_ELx_EC_MAX UL(0x3F) #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_WIDTH (6) From 0e6774ec012bf8bf28eac2a00478d7e0639a3a8f Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Tue, 27 Aug 2024 14:17:01 -0700 Subject: [PATCH 464/534] arm64: errata: Enable the AC03_CPU_38 workaround for ampere1a commit db0d8a84348b876df7c4276f0cbce5df3b769f5f upstream. The ampere1a cpu is affected by erratum AC04_CPU_10 which is the same bug as AC03_CPU_38. Add ampere1a to the AC03_CPU_38 workaround midr list. Cc: Signed-off-by: D Scott Phillips Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240827211701.2216719-1-scott@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 10 +++++++++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 357d6cb98161..3084f5cf5e40 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -54,6 +54,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 | +----------------+-----------------+-----------------+-----------------------------+ +| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9e0c1ac3d13e..5ea7b3319671 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -420,7 +420,7 @@ config AMPERE_ERRATUM_AC03_CPU_38 default y help This option adds an alternative code sequence to work around Ampere - erratum AC03_CPU_38 on AmpereOne. + errata AC03_CPU_38 and AC04_CPU_10 on AmpereOne. The affected design reports FEAT_HAFDBS as not implemented in ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5fd7caea4419..5a7dfeb8e8eb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -143,6 +143,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -212,6 +213,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 57b1d6a68256..f8d94902fbb5 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -472,6 +472,14 @@ static const struct midr_range erratum_spec_ssbs_list[] = { }; #endif +#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 +static const struct midr_range erratum_ac03_cpu_38_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -789,7 +797,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "AmpereOne erratum AC03_CPU_38", .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38, - ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1), + ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list), }, #endif { From 4fff20cff6e21ab9d47811ffdad7c97b4f2b8d56 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sun, 4 Aug 2024 23:10:24 +0200 Subject: [PATCH 465/534] arm64: dts: rockchip: Raise Pinebook Pro's panel backlight PWM frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8c51521de18755d4112a77a598a348b38d0af370 upstream. Increase the frequency of the PWM signal that drives the LED backlight of the Pinebook Pro's panel, from about 1.35 KHz (which equals to the PWM period of 740,740 ns), to exactly 8 kHz (which equals to the PWM period of 125,000 ns). Using a higher PWM frequency for the panel backlight, which reduces the flicker, can only be beneficial to the end users' eyes. On top of that, increasing the backlight PWM signal frequency reportedly eliminates the buzzing emitted from the Pinebook Pro's built-in speakers when certain backlight levels are set, which cause some weird interference with some of the components of the Pinebook Pro's audio chain. The old value for the backlight PWM period, i.e. 740,740 ns, is pretty much an arbitrary value that was selected during the very early bring-up of the Pinebook Pro, only because that value seemed to minimize horizontal line distortion on the display, which resulted from the old X.org drivers causing screen tearing when dragging windows around. That's no longer an issue, so there are no reasons to stick with the old PWM period value. The lower and the upper backlight PWM frequency limits for the Pinebook Pro's panel, according to its datasheet, are 200 Hz and 10 kHz, respectively. [1] These changes still leave some headroom, which may have some positive effects on the lifetime expectancy of the panel's backlight LEDs. [1] https://files.pine64.org/doc/datasheet/PinebookPro/NV140FHM-N49_Rev.P0_20160804_201710235838.pdf Fixes: 5a65505a6988 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Cc: stable@vger.kernel.org Reported-by: Nikola Radojevic Signed-off-by: Dragan Simic Tested-by: Nikola Radojević Link: https://lore.kernel.org/r/2a23b6cfd8c0513e5b233b4006ee3d3ed09b824f.1722805655.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 294eb2de263d..b3f76cc2d6e1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -32,7 +32,7 @@ backlight: edp-backlight { compatible = "pwm-backlight"; power-supply = <&vcc_12v>; - pwms = <&pwm0 0 740740 0>; + pwms = <&pwm0 0 125000 0>; }; bat: battery { From eea02200cb8c6e979a28661c4d65749210b677e6 Mon Sep 17 00:00:00 2001 From: Qingqing Zhou Date: Thu, 25 Jul 2024 12:51:17 +0530 Subject: [PATCH 466/534] arm64: dts: qcom: sa8775p: Mark APPS and PCIe SMMUs as DMA coherent commit 421688265d7f5d3ff4211982e7231765378bb64f upstream. The SMMUs on sa8775p are cache-coherent. GPU SMMU is marked as such, mark the APPS and PCIe ones as well. Fixes: 603f96d4c9d0 ("arm64: dts: qcom: add initial support for qcom sa8775p-ride") Fixes: 2dba7a613a6e ("arm64: dts: qcom: sa8775p: add the pcie smmu node") Cc: stable@vger.kernel.org Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Signed-off-by: Qingqing Zhou Rule: add Link: https://lore.kernel.org/stable/20240723075948.9545-1-quic_qqzhou%40quicinc.com Link: https://lore.kernel.org/r/20240725072117.22425-1-quic_qqzhou@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sa8775p.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 44bea063aedb..f6766fa8df34 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -1951,6 +1951,7 @@ reg = <0x0 0x15000000 0x0 0x100000>; #iommu-cells = <2>; #global-interrupts = <2>; + dma-coherent; interrupts = , , @@ -2089,6 +2090,7 @@ reg = <0x0 0x15200000 0x0 0x80000>; #iommu-cells = <2>; #global-interrupts = <2>; + dma-coherent; interrupts = , , From ca2a69fdd6af7ad39b7275c00aae69cb74800bc2 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 15 Jul 2024 19:44:20 +0200 Subject: [PATCH 467/534] arm64: dts: rockchip: Correct the Pinebook Pro battery design capacity commit def33fb1191207f5afa6dcb681d71fef2a6c1293 upstream. All batches of the Pine64 Pinebook Pro, except the latest batch (as of 2024) whose hardware design was revised due to the component shortage, use a 1S lithium battery whose nominal/design capacity is 10,000 mAh, according to the battery datasheet. [1][2] Let's correct the design full-charge value in the Pinebook Pro board dts, to improve the accuracy of the hardware description, and to hopefully improve the accuracy of the fuel gauge a bit on all units that don't belong to the latest batch. The above-mentioned latest batch uses a different 1S lithium battery with a slightly lower capacity, more precisely 9,600 mAh. To make the fuel gauge work reliably on the latest batch, a sample battery would need to be sent to CellWise, to obtain its proprietary battery profile, whose data goes into "cellwise,battery-profile" in the Pinebook Pro board dts. Without that data, the fuel gauge reportedly works unreliably, so changing the design capacity won't have any negative effects on the already unreliable operation of the fuel gauge in the Pinebook Pros that belong to the latest batch. According to the battery datasheet, its voltage can go as low as 2.75 V while discharging, but it's better to leave the current 3.0 V value in the dts file, because of the associated Pinebook Pro's voltage regulation issues. [1] https://wiki.pine64.org/index.php/Pinebook_Pro#Battery [2] https://files.pine64.org/doc/datasheet/pinebook/40110175P%203.8V%2010000mAh%E8%A7%84%E6%A0%BC%E4%B9%A6-14.pdf Fixes: c7c4d698cd28 ("arm64: dts: rockchip: add fuel gauge to Pinebook Pro dts") Cc: stable@vger.kernel.org Cc: Marek Kraus Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/731f8ef9b1a867bcc730d19ed277c8c0534c0842.1721065172.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index b3f76cc2d6e1..f5e124b235c8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -37,7 +37,7 @@ bat: battery { compatible = "simple-battery"; - charge-full-design-microamp-hours = <9800000>; + charge-full-design-microamp-hours = <10000000>; voltage-max-design-microvolt = <4350000>; voltage-min-design-microvolt = <3000000>; }; From 0eed942bc65de1f93eca7bda51344290f9c573bb Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Fri, 23 Aug 2024 21:07:30 +0800 Subject: [PATCH 468/534] vfs: fix race between evice_inodes() and find_inode()&iput() commit 88b1afbf0f6b221f6c5bb66cc80cd3b38d696687 upstream. Hi, all Recently I noticed a bug[1] in btrfs, after digged it into and I believe it'a race in vfs. Let's assume there's a inode (ie ino 261) with i_count 1 is called by iput(), and there's a concurrent thread calling generic_shutdown_super(). cpu0: cpu1: iput() // i_count is 1 ->spin_lock(inode) ->dec i_count to 0 ->iput_final() generic_shutdown_super() ->__inode_add_lru() ->evict_inodes() // cause some reason[2] ->if (atomic_read(inode->i_count)) continue; // return before // inode 261 passed the above check // list_lru_add_obj() // and then schedule out ->spin_unlock() // note here: the inode 261 // was still at sb list and hash list, // and I_FREEING|I_WILL_FREE was not been set btrfs_iget() // after some function calls ->find_inode() // found the above inode 261 ->spin_lock(inode) // check I_FREEING|I_WILL_FREE // and passed ->__iget() ->spin_unlock(inode) // schedule back ->spin_lock(inode) // check (I_NEW|I_FREEING|I_WILL_FREE) flags, // passed and set I_FREEING iput() ->spin_unlock(inode) ->spin_lock(inode) ->evict() // dec i_count to 0 ->iput_final() ->spin_unlock() ->evict() Now, we have two threads simultaneously evicting the same inode, which may trigger the BUG(inode->i_state & I_CLEAR) statement both within clear_inode() and iput(). To fix the bug, recheck the inode->i_count after holding i_lock. Because in the most scenarios, the first check is valid, and the overhead of spin_lock() can be reduced. If there is any misunderstanding, please let me know, thanks. [1]: https://lore.kernel.org/linux-btrfs/000000000000eabe1d0619c48986@google.com/ [2]: The reason might be 1. SB_ACTIVE was removed or 2. mapping_shrinkable() return false when I reproduced the bug. Reported-by: syzbot+67ba3c42bcbb4665d3ad@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=67ba3c42bcbb4665d3ad CC: stable@vger.kernel.org Fixes: 63997e98a3be ("split invalidate_inodes()") Signed-off-by: Julian Sun Link: https://lore.kernel.org/r/20240823130730.658881-1-sunjunchao2870@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/inode.c b/fs/inode.c index 776c2a049d07..9cafde77e2b0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -757,6 +757,10 @@ again: continue; spin_lock(&inode->i_lock); + if (atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + continue; + } if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); continue; From 4d3d0869eccb587ba17c3a7ce61bc617e8aed652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 21 Aug 2024 11:56:05 +0200 Subject: [PATCH 469/534] fs: Fix file_set_fowner LSM hook inconsistencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 26f204380a3c182e5adf1a798db0724d6111b597 upstream. The fcntl's F_SETOWN command sets the process that handle SIGIO/SIGURG for the related file descriptor. Before this change, the file_set_fowner LSM hook was always called, ignoring the VFS logic which may not actually change the process that handles SIGIO (e.g. TUN, TTY, dnotify), nor update the related UID/EUID. Moreover, because security_file_set_fowner() was called without lock (e.g. f_owner.lock), concurrent F_SETOWN commands could result to a race condition and inconsistent LSM states (e.g. SELinux's fown_sid) compared to struct fown_struct's UID/EUID. This change makes sure the LSM states are always in sync with the VFS state by moving the security_file_set_fowner() call close to the UID/EUID updates and using the same f_owner.lock . Rename f_modown() to __f_setown() to simplify code. Cc: stable@vger.kernel.org Cc: Al Viro Cc: Casey Schaufler Cc: Christian Brauner Cc: James Morris Cc: Jann Horn Cc: Ondrej Mosnacek Cc: Paul Moore Cc: Serge E. Hallyn Cc: Stephen Smalley Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Mickaël Salaün Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- fs/fcntl.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 9f606714d081..1484f062ee65 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -86,8 +86,8 @@ static int setfl(int fd, struct file * filp, unsigned int arg) return error; } -static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, - int force) +void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, + int force) { write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { @@ -97,19 +97,13 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, if (pid) { const struct cred *cred = current_cred(); + security_file_set_fowner(filp); filp->f_owner.uid = cred->uid; filp->f_owner.euid = cred->euid; } } write_unlock_irq(&filp->f_owner.lock); } - -void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, - int force) -{ - security_file_set_fowner(filp); - f_modown(filp, pid, type, force); -} EXPORT_SYMBOL(__f_setown); int f_setown(struct file *filp, int who, int force) @@ -145,7 +139,7 @@ EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { - f_modown(filp, NULL, PIDTYPE_TGID, 1); + __f_setown(filp, NULL, PIDTYPE_TGID, 1); } pid_t f_getown(struct file *filp) From 2a4a997adb36b06b49d5664b99bf9ba211a3d784 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Wed, 4 Sep 2024 20:34:57 +0800 Subject: [PATCH 470/534] nfs: fix memory leak in error path of nfs4_do_reclaim commit 8f6a7c9467eaf39da4c14e5474e46190ab3fb529 upstream. Commit c77e22834ae9 ("NFSv4: Fix a potential sleep while atomic in nfs4_do_reclaim()") separate out the freeing of the state owners from nfs4_purge_state_owners() and finish it outside the rcu lock. However, the error path is omitted. As a result, the state owners in "freeme" will not be released. Fix it by adding freeing in the error path. Fixes: c77e22834ae9 ("NFSv4: Fix a potential sleep while atomic in nfs4_do_reclaim()") Signed-off-by: Li Lingfeng Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c95c50328ced..6c4539ed2654 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1957,6 +1957,7 @@ restart: set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); status = nfs4_recovery_handle_error(clp, status); + nfs4_free_state_owners(&freeme); return (status != 0) ? status : -EAGAIN; } From e0be8f2d64d65c8b7a93f10b25c5631abc892661 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 14 Aug 2024 14:10:11 +0800 Subject: [PATCH 471/534] EDAC/igen6: Fix conversion of system address to physical memory address commit 0ad875f442e95d69a1145a38aabac2fd29984fe3 upstream. The conversion of system address to physical memory address (as viewed by the memory controller) by igen6_edac is incorrect when the system address is above the TOM (Total amount Of populated physical Memory) for Elkhart Lake and Ice Lake (Neural Network Processor). Fix this conversion. Fixes: 10590a9d4f23 ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lore.kernel.org/stable/20240814061011.43545-1-qiuxu.zhuo%40intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/igen6_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index a2984e9bed3a..a0edb61a5a01 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -245,7 +245,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) if (igen6_tom <= _4GB) return eaddr + igen6_tolud - _4GB; - if (eaddr < _4GB) + if (eaddr >= igen6_tom) return eaddr + igen6_tolud - igen6_tom; return eaddr; From 662ec52260cc07b9ae53ecd3925183c29d34288b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2024 14:46:39 +0000 Subject: [PATCH 472/534] icmp: change the order of rate limits commit 8c2bd38b95f75f3d2a08c93e35303e26d480d24e upstream. ICMP messages are ratelimited : After the blamed commits, the two rate limiters are applied in this order: 1) host wide ratelimit (icmp_global_allow()) 2) Per destination ratelimit (inetpeer based) In order to avoid side-channels attacks, we need to apply the per destination check first. This patch makes the following change : 1) icmp_global_allow() checks if the host wide limit is reached. But credits are not yet consumed. This is deferred to 3) 2) The per destination limit is checked/updated. This might add a new node in inetpeer tree. 3) icmp_global_consume() consumes tokens if prior operations succeeded. This means that host wide ratelimit is still effective in keeping inetpeer tree small even under DDOS. As a bonus, I removed icmp_global.lock as the fast path can use a lock-free operation. Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited") Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Reported-by: Keyu Man Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Cc: Jesper Dangaard Brouer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 + net/ipv4/icmp.c | 103 ++++++++++++++++++++++++++--------------------- net/ipv6/icmp.c | 28 ++++++++----- 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 6f1ff4846451..7db5912e0c5f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -786,6 +786,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) } bool icmp_global_allow(void); +void icmp_global_consume(void); + extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3b221643206d..9dffdd876fef 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -222,57 +222,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000; int sysctl_icmp_msgs_burst __read_mostly = 50; static struct { - spinlock_t lock; - u32 credit; + atomic_t credit; u32 stamp; -} icmp_global = { - .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), -}; +} icmp_global; /** * icmp_global_allow - Are we allowed to send one more ICMP message ? * * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. - * Note: called with BH disabled + * Works in tandem with icmp_global_consume(). */ bool icmp_global_allow(void) { - u32 credit, delta, incr = 0, now = (u32)jiffies; - bool rc = false; + u32 delta, now, oldstamp; + int incr, new, old; - /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. The READ_ONCE() are paired - * with the following WRITE_ONCE() in this same function. + /* Note: many cpus could find this condition true. + * Then later icmp_global_consume() could consume more credits, + * this is an acceptable race. */ - if (!READ_ONCE(icmp_global.credit)) { - delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); - if (delta < HZ / 50) - return false; - } + if (atomic_read(&icmp_global.credit) > 0) + return true; - spin_lock(&icmp_global.lock); - delta = min_t(u32, now - icmp_global.stamp, HZ); - if (delta >= HZ / 50) { - incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; - if (incr) - WRITE_ONCE(icmp_global.stamp, now); + now = jiffies; + oldstamp = READ_ONCE(icmp_global.stamp); + delta = min_t(u32, now - oldstamp, HZ); + if (delta < HZ / 50) + return false; + + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; + if (!incr) + return false; + + if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) { + old = atomic_read(&icmp_global.credit); + do { + new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst)); + } while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new)); } - credit = min_t(u32, icmp_global.credit + incr, - READ_ONCE(sysctl_icmp_msgs_burst)); - if (credit) { - /* We want to use a credit of one in average, but need to randomize - * it for security reasons. - */ - credit = max_t(int, credit - get_random_u32_below(3), 0); - rc = true; - } - WRITE_ONCE(icmp_global.credit, credit); - spin_unlock(&icmp_global.lock); - return rc; + return true; } EXPORT_SYMBOL(icmp_global_allow); +void icmp_global_consume(void) +{ + int credits = get_random_u32_below(3); + + /* Note: this might make icmp_global.credit negative. */ + if (credits) + atomic_sub(credits, &icmp_global.credit); +} +EXPORT_SYMBOL(icmp_global_consume); + static bool icmpv4_mask_allow(struct net *net, int type, int code) { if (type > NR_ICMP_TYPES) @@ -289,14 +291,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return false; } -static bool icmpv4_global_allow(struct net *net, int type, int code) +static bool icmpv4_global_allow(struct net *net, int type, int code, + bool *apply_ratelimit) { if (icmpv4_mask_allow(net, type, code)) return true; - if (icmp_global_allow()) + if (icmp_global_allow()) { + *apply_ratelimit = true; return true; - + } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } @@ -306,15 +310,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) */ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) + struct flowi4 *fl4, int type, int code, + bool apply_ratelimit) { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; int vif; - if (icmpv4_mask_allow(net, type, code)) - goto out; + if (!apply_ratelimit) + return true; /* No rate limit on loopback */ if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) @@ -329,6 +334,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); + else + icmp_global_consume(); return rc; } @@ -400,6 +407,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); + bool apply_ratelimit = false; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -411,11 +419,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); - /* global icmp_msgs_per_sec */ - if (!icmpv4_global_allow(net, type, code)) + /* is global icmp_msgs_per_sec exhausted ? */ + if (!icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -448,7 +456,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; - if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); ip_rt_put(rt); out_unlock: @@ -592,6 +600,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, int room; struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); + bool apply_ratelimit = false; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -673,7 +682,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, } } - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless @@ -681,7 +690,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) */ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && - !icmpv4_global_allow(net, type, code)) + !icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -740,7 +749,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, goto out_unlock; /* peer icmp_ratelimit */ - if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901d1..a790294d3104 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type) return false; } -static bool icmpv6_global_allow(struct net *net, int type) +static bool icmpv6_global_allow(struct net *net, int type, + bool *apply_ratelimit) { if (icmpv6_mask_allow(net, type)) return true; - if (icmp_global_allow()) + if (icmp_global_allow()) { + *apply_ratelimit = true; return true; - + } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } @@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type) * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi6 *fl6) + struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; - if (icmpv6_mask_allow(net, type)) + if (!apply_ratelimit) return true; /* @@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ICMP6_MIB_RATELIMITHOST); + else + icmp_global_consume(); dst_release(dst); return res; } @@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; + bool apply_ratelimit = false; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi6 fl6; @@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, return; } - /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ + /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) + if (!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, type, &apply_ratelimit)) goto out_bh_enable; mip6_addr_swap(skb, parm); @@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl6)) + if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) goto out; tmp_hdr.icmp6_type = type; @@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); + bool apply_ratelimit = false; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; @@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) goto out; /* Check the ratelimit */ - if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || - !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) + if ((!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || + !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) goto out_dst_release; idev = __in6_dev_get(skb->dev); From 62004f17039dc60cb500337b0792c9f6bbda8e27 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Aug 2024 11:40:22 +0200 Subject: [PATCH 473/534] cpuidle: riscv-sbi: Use scoped device node handling to fix missing of_node_put commit a309320ddbac6b1583224fcb6bacd424bcf8637f upstream. Two return statements in sbi_cpuidle_dt_init_states() did not drop the OF node reference count. Solve the issue and simplify entire error handling with scoped/cleanup.h. Fixes: 6abf32f1d9c5 ("cpuidle: Add RISC-V SBI CPU idle driver") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Anup Patel Link: https://patch.msgid.link/20240820094023.61155-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-riscv-sbi.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index e8094fc92491..c0fe92409175 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "cpuidle-riscv-sbi: " fmt +#include #include #include #include @@ -267,19 +268,16 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, { struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu); struct device_node *state_node; - struct device_node *cpu_node; u32 *states; int i, ret; - cpu_node = of_cpu_device_node_get(cpu); + struct device_node *cpu_node __free(device_node) = of_cpu_device_node_get(cpu); if (!cpu_node) return -ENODEV; states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL); - if (!states) { - ret = -ENOMEM; - goto fail; - } + if (!states) + return -ENOMEM; /* Parse SBI specific details from state DT nodes */ for (i = 1; i < state_count; i++) { @@ -295,10 +293,8 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, pr_debug("sbi-state %#x index %d\n", states[i], i); } - if (i != state_count) { - ret = -ENODEV; - goto fail; - } + if (i != state_count) + return -ENODEV; /* Initialize optional data, used for the hierarchical topology. */ ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu); @@ -308,10 +304,7 @@ static int sbi_cpuidle_dt_init_states(struct device *dev, /* Store states in the per-cpu struct. */ data->states = states; -fail: - of_node_put(cpu_node); - - return ret; + return 0; } static void sbi_cpuidle_deinit_cpu(int cpu) From 1b8cf11b3ca593a8802a51802cd0c28c38501428 Mon Sep 17 00:00:00 2001 From: VanGiang Nguyen Date: Fri, 9 Aug 2024 06:21:42 +0000 Subject: [PATCH 474/534] padata: use integer wrap around to prevent deadlock on seq_nr overflow commit 9a22b2812393d93d84358a760c347c21939029a6 upstream. When submitting more than 2^32 padata objects to padata_do_serial, the current sorting implementation incorrectly sorts padata objects with overflowed seq_nr, causing them to be placed before existing objects in the reorder list. This leads to a deadlock in the serialization process as padata_find_next cannot match padata->seq_nr and pd->processed because the padata instance with overflowed seq_nr will be selected next. To fix this, we use an unsigned integer wrap around to correctly sort padata objects in scenarios with integer overflow. Fixes: bfde23ce200e ("padata: unbind parallel jobs from specific CPUs") Cc: Co-developed-by: Christian Gafert Signed-off-by: Christian Gafert Co-developed-by: Max Ferger Signed-off-by: Max Ferger Signed-off-by: Van Giang Nguyen Acked-by: Daniel Jordan Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/padata.c b/kernel/padata.c index da788a58d19c..9bf77b58ee08 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -404,7 +404,8 @@ void padata_do_serial(struct padata_priv *padata) /* Sort in ascending order of sequence number. */ list_for_each_prev(pos, &reorder->list) { cur = list_entry(pos, struct padata_priv, list); - if (cur->seq_nr < padata->seq_nr) + /* Compare by difference to consider integer wrap around */ + if ((signed int)(cur->seq_nr - padata->seq_nr) < 0) break; } list_add(&padata->list, pos); From 45f690fae4736a15bf938356820c3369b575e018 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 5 Sep 2024 17:43:36 +0800 Subject: [PATCH 475/534] spi: fspi: involve lut_num for struct nxp_fspi_devtype_data commit 190b7e2efb1ed8435fc7431d9c7a2447d05d5066 upstream. The flexspi on different SoCs may have different number of LUTs. So involve lut_num in nxp_fspi_devtype_data to make distinguish. This patch prepare for the adding of imx8ulp. Fixes: ef89fd56bdfc ("arm64: dts: imx8ulp: add flexspi node") Cc: stable@kernel.org Signed-off-by: Haibo Chen Reviewed-by: Frank Li Link: https://patch.msgid.link/20240905094338.1986871-3-haibo.chen@nxp.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-nxp-fspi.c | 44 ++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 93a9667f6bdc..5053dbd3081a 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -57,13 +57,6 @@ #include #include -/* - * The driver only uses one single LUT entry, that is updated on - * each call of exec_op(). Index 0 is preset at boot with a basic - * read operation, so let's use the last entry (31). - */ -#define SEQID_LUT 31 - /* Registers used by the driver */ #define FSPI_MCR0 0x00 #define FSPI_MCR0_AHB_TIMEOUT(x) ((x) << 24) @@ -263,9 +256,6 @@ #define FSPI_TFDR 0x180 #define FSPI_LUT_BASE 0x200 -#define FSPI_LUT_OFFSET (SEQID_LUT * 4 * 4) -#define FSPI_LUT_REG(idx) \ - (FSPI_LUT_BASE + FSPI_LUT_OFFSET + (idx) * 4) /* register map end */ @@ -341,6 +331,7 @@ struct nxp_fspi_devtype_data { unsigned int txfifo; unsigned int ahb_buf_size; unsigned int quirks; + unsigned int lut_num; bool little_endian; }; @@ -349,6 +340,7 @@ static struct nxp_fspi_devtype_data lx2160a_data = { .txfifo = SZ_1K, /* (128 * 64 bits) */ .ahb_buf_size = SZ_2K, /* (256 * 64 bits) */ .quirks = 0, + .lut_num = 32, .little_endian = true, /* little-endian */ }; @@ -357,6 +349,7 @@ static struct nxp_fspi_devtype_data imx8mm_data = { .txfifo = SZ_1K, /* (128 * 64 bits) */ .ahb_buf_size = SZ_2K, /* (256 * 64 bits) */ .quirks = 0, + .lut_num = 32, .little_endian = true, /* little-endian */ }; @@ -365,6 +358,7 @@ static struct nxp_fspi_devtype_data imx8qxp_data = { .txfifo = SZ_1K, /* (128 * 64 bits) */ .ahb_buf_size = SZ_2K, /* (256 * 64 bits) */ .quirks = 0, + .lut_num = 32, .little_endian = true, /* little-endian */ }; @@ -373,6 +367,7 @@ static struct nxp_fspi_devtype_data imx8dxl_data = { .txfifo = SZ_1K, /* (128 * 64 bits) */ .ahb_buf_size = SZ_2K, /* (256 * 64 bits) */ .quirks = FSPI_QUIRK_USE_IP_ONLY, + .lut_num = 32, .little_endian = true, /* little-endian */ }; @@ -544,6 +539,8 @@ static void nxp_fspi_prepare_lut(struct nxp_fspi *f, void __iomem *base = f->iobase; u32 lutval[4] = {}; int lutidx = 1, i; + u32 lut_offset = (f->devtype_data->lut_num - 1) * 4 * 4; + u32 target_lut_reg; /* cmd */ lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth), @@ -588,8 +585,10 @@ static void nxp_fspi_prepare_lut(struct nxp_fspi *f, fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR); /* fill LUT */ - for (i = 0; i < ARRAY_SIZE(lutval); i++) - fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i)); + for (i = 0; i < ARRAY_SIZE(lutval); i++) { + target_lut_reg = FSPI_LUT_BASE + lut_offset + i * 4; + fspi_writel(f, lutval[i], base + target_lut_reg); + } dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x], size: 0x%08x\n", op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3], op->data.nbytes); @@ -876,7 +875,7 @@ static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op *op) void __iomem *base = f->iobase; int seqnum = 0; int err = 0; - u32 reg; + u32 reg, seqid_lut; reg = fspi_readl(f, base + FSPI_IPRXFCR); /* invalid RXFIFO first */ @@ -892,8 +891,9 @@ static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op *op) * the LUT at each exec_op() call. And also specify the DATA * length, since it's has not been specified in the LUT. */ + seqid_lut = f->devtype_data->lut_num - 1; fspi_writel(f, op->data.nbytes | - (SEQID_LUT << FSPI_IPCR1_SEQID_SHIFT) | + (seqid_lut << FSPI_IPCR1_SEQID_SHIFT) | (seqnum << FSPI_IPCR1_SEQNUM_SHIFT), base + FSPI_IPCR1); @@ -1017,7 +1017,7 @@ static int nxp_fspi_default_setup(struct nxp_fspi *f) { void __iomem *base = f->iobase; int ret, i; - u32 reg; + u32 reg, seqid_lut; /* disable and unprepare clock to avoid glitch pass to controller */ nxp_fspi_clk_disable_unprep(f); @@ -1092,11 +1092,17 @@ static int nxp_fspi_default_setup(struct nxp_fspi *f) fspi_writel(f, reg, base + FSPI_FLSHB1CR1); fspi_writel(f, reg, base + FSPI_FLSHB2CR1); + /* + * The driver only uses one single LUT entry, that is updated on + * each call of exec_op(). Index 0 is preset at boot with a basic + * read operation, so let's use the last entry. + */ + seqid_lut = f->devtype_data->lut_num - 1; /* AHB Read - Set lut sequence ID for all CS. */ - fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2); - fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2); - fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB1CR2); - fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB2CR2); + fspi_writel(f, seqid_lut, base + FSPI_FLSHA1CR2); + fspi_writel(f, seqid_lut, base + FSPI_FLSHA2CR2); + fspi_writel(f, seqid_lut, base + FSPI_FLSHB1CR2); + fspi_writel(f, seqid_lut, base + FSPI_FLSHB2CR2); f->selected = -1; From f6bda3f118e385f395ece12384c7730fead4cc32 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 31 Aug 2024 12:11:28 +0200 Subject: [PATCH 476/534] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl commit 1b0e32753d8550908dff8982410357b5114be78c upstream. The property is "fsl,pins", not "fsl,pin". Wrong property means the pin configuration was not applied. Fixes dtbs_check warnings: imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+' Cc: stable@vger.kernel.org Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Michael Trimarchi Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts index 875ae699c5cb..ce9f4c226729 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts @@ -366,7 +366,7 @@ }; pinctrl_tsc: tscgrp { - fsl,pin = < + fsl,pins = < MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0 MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0 MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0 From c48d5ad1c4b822e40a41117440db0da66870fca3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 Aug 2024 20:05:23 +0200 Subject: [PATCH 477/534] soc: versatile: realview: fix memory leak during device remove [ Upstream commit 1c4f26a41f9d052f334f6ae629e01f598ed93508 ] If device is unbound, the memory allocated for soc_dev_attr should be freed to prevent leaks. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-2-ff4b35abed83@linaro.org Signed-off-by: Linus Walleij Stable-dep-of: c774f2564c00 ("soc: versatile: realview: fix soc_dev leak during device remove") Signed-off-by: Sasha Levin --- drivers/soc/versatile/soc-realview.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c index c6876d232d8f..d304ee69287a 100644 --- a/drivers/soc/versatile/soc-realview.c +++ b/drivers/soc/versatile/soc-realview.c @@ -93,7 +93,7 @@ static int realview_soc_probe(struct platform_device *pdev) if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); - soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); + soc_dev_attr = devm_kzalloc(&pdev->dev, sizeof(*soc_dev_attr), GFP_KERNEL); if (!soc_dev_attr) return -ENOMEM; @@ -106,10 +106,9 @@ static int realview_soc_probe(struct platform_device *pdev) soc_dev_attr->family = "Versatile"; soc_dev_attr->custom_attr_group = realview_groups[0]; soc_dev = soc_device_register(soc_dev_attr); - if (IS_ERR(soc_dev)) { - kfree(soc_dev_attr); + if (IS_ERR(soc_dev)) return -ENODEV; - } + ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET, &realview_coreid); if (ret) From 8526ca3bc8af8106bdb94ff4ecb3ca2519a5f25f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 25 Aug 2024 20:05:24 +0200 Subject: [PATCH 478/534] soc: versatile: realview: fix soc_dev leak during device remove [ Upstream commit c774f2564c0086c23f5269fd4691f233756bf075 ] If device is unbound, the soc_dev should be unregistered to prevent memory leak. Fixes: a2974c9c1f83 ("soc: add driver for the ARM RealView") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20240825-soc-dev-fixes-v1-3-ff4b35abed83@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/soc/versatile/soc-realview.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c index d304ee69287a..cf91abe07d38 100644 --- a/drivers/soc/versatile/soc-realview.c +++ b/drivers/soc/versatile/soc-realview.c @@ -4,6 +4,7 @@ * * Author: Linus Walleij */ +#include #include #include #include @@ -81,6 +82,13 @@ static struct attribute *realview_attrs[] = { ATTRIBUTE_GROUPS(realview); +static void realview_soc_socdev_release(void *data) +{ + struct soc_device *soc_dev = data; + + soc_device_unregister(soc_dev); +} + static int realview_soc_probe(struct platform_device *pdev) { struct regmap *syscon_regmap; @@ -109,6 +117,11 @@ static int realview_soc_probe(struct platform_device *pdev) if (IS_ERR(soc_dev)) return -ENODEV; + ret = devm_add_action_or_reset(&pdev->dev, realview_soc_socdev_release, + soc_dev); + if (ret) + return ret; + ret = regmap_read(syscon_regmap, REALVIEW_SYS_ID_OFFSET, &realview_coreid); if (ret) From eff6dde4c3a0e2f93b1a9ababdbca78f3005b098 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 13 Dec 2023 16:42:37 +0000 Subject: [PATCH 479/534] usb: yurex: Replace snprintf() with the safer scnprintf() variant [ Upstream commit 86b20af11e84c26ae3fde4dcc4f490948e3f8035 ] There is a general misunderstanding amongst engineers that {v}snprintf() returns the length of the data *actually* encoded into the destination array. However, as per the C99 standard {v}snprintf() really returns the length of the data that *would have been* written if there were enough space for it. This misunderstanding has led to buffer-overruns in the past. It's generally considered safer to use the {v}scnprintf() variants in their place (or even sprintf() in simple cases). So let's do that. Whilst we're at it, let's define some magic numbers to increase readability and ease of maintenance. Link: https://lwn.net/Articles/69419/ Link: https://github.com/KSPP/linux/issues/105 Cc: Tomoki Sekiyama Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20231213164246.1021885-9-lee@kernel.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 93907620b308 ("USB: misc: yurex: fix race between read and write") Signed-off-by: Sasha Levin --- drivers/usb/misc/yurex.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index c640f98d20c5..5a13cddace0e 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -34,6 +34,8 @@ #define YUREX_BUF_SIZE 8 #define YUREX_WRITE_TIMEOUT (HZ*2) +#define MAX_S64_STRLEN 20 /* {-}922337203685477580{7,8} */ + /* table of devices that work with this driver */ static struct usb_device_id yurex_table[] = { { USB_DEVICE(YUREX_VENDOR_ID, YUREX_PRODUCT_ID) }, @@ -401,7 +403,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, { struct usb_yurex *dev; int len = 0; - char in_buffer[20]; + char in_buffer[MAX_S64_STRLEN]; unsigned long flags; dev = file->private_data; @@ -412,14 +414,14 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, return -ENODEV; } + if (WARN_ON_ONCE(dev->bbu > S64_MAX || dev->bbu < S64_MIN)) + return -EIO; + spin_lock_irqsave(&dev->lock, flags); - len = snprintf(in_buffer, 20, "%lld\n", dev->bbu); + scnprintf(in_buffer, MAX_S64_STRLEN, "%lld\n", dev->bbu); spin_unlock_irqrestore(&dev->lock, flags); mutex_unlock(&dev->io_mutex); - if (WARN_ON_ONCE(len >= sizeof(in_buffer))) - return -EIO; - return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } From 225643310df7926f6286250bae266fabd6514c16 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 12 Sep 2024 15:21:22 +0200 Subject: [PATCH 480/534] USB: misc: yurex: fix race between read and write [ Upstream commit 93907620b308609c72ba4b95b09a6aa2658bb553 ] The write code path touches the bbu member in a non atomic manner without taking the spinlock. Fix it. The bug is as old as the driver. Signed-off-by: Oliver Neukum CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240912132126.1034743-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/yurex.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 5a13cddace0e..44136989f6c6 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -404,7 +404,6 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, struct usb_yurex *dev; int len = 0; char in_buffer[MAX_S64_STRLEN]; - unsigned long flags; dev = file->private_data; @@ -417,9 +416,9 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, if (WARN_ON_ONCE(dev->bbu > S64_MAX || dev->bbu < S64_MIN)) return -EIO; - spin_lock_irqsave(&dev->lock, flags); + spin_lock_irq(&dev->lock); scnprintf(in_buffer, MAX_S64_STRLEN, "%lld\n", dev->bbu); - spin_unlock_irqrestore(&dev->lock, flags); + spin_unlock_irq(&dev->lock); mutex_unlock(&dev->io_mutex); return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); @@ -509,8 +508,11 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, __func__, retval); goto error; } - if (set && timeout) + if (set && timeout) { + spin_lock_irq(&dev->lock); dev->bbu = c2; + spin_unlock_irq(&dev->lock); + } return timeout ? count : -EIO; error: From eef5d6219a81687aadd6a4e727bae8f84ec0012e Mon Sep 17 00:00:00 2001 From: Daehwan Jung Date: Mon, 10 Jun 2024 20:39:12 +0900 Subject: [PATCH 481/534] xhci: Add a quirk for writing ERST in high-low order [ Upstream commit bc162403e33e1d57e40994977acaf19f1434e460 ] This quirk is for the controller that has a limitation in supporting separate ERSTBA_HI and ERSTBA_LO programming. It's supported when the ERSTBA is programmed ERSTBA_HI before ERSTBA_LO. That's because the internal initialization of event ring fetches the "Event Ring Segment Table Entry" based on the indication of ERSTBA_LO written. Signed-off-by: Daehwan Jung Link: https://lore.kernel.org/r/1718019553-111939-3-git-send-email-dh10.jung@samsung.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: e5fa8db0be3e ("usb: xhci: fix loss of data on Cadence xHC") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 5 ++++- drivers/usb/host/xhci.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b1e3fa54c639..54c47463c215 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2289,7 +2289,10 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); erst_base &= ERST_BASE_RSVDP; erst_base |= ir->erst.erst_dma_addr & ~ERST_BASE_RSVDP; - xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); + if (xhci->quirks & XHCI_WRITE_64_HI_LO) + hi_lo_writeq(erst_base, &ir->ir_set->erst_base); + else + xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); /* Set the event ring dequeue address of this interrupter */ xhci_set_hc_event_deq(xhci, ir); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index b29fe4716f34..8e4d465b9dd6 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -17,6 +17,7 @@ #include #include #include +#include /* Code sharing between pci-quirks and xhci hcd */ #include "xhci-ext-caps.h" @@ -1913,6 +1914,7 @@ struct xhci_hcd { #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) +#define XHCI_WRITE_64_HI_LO BIT_ULL(47) unsigned int num_active_eps; unsigned int limit_active_eps; From aafeabf2765f3371a3c21d9e1b7e4a9d961592bc Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 5 Sep 2024 07:03:28 +0000 Subject: [PATCH 482/534] usb: xhci: fix loss of data on Cadence xHC [ Upstream commit e5fa8db0be3e8757e8641600c518425a4589b85c ] Streams should flush their TRB cache, re-read TRBs, and start executing TRBs from the beginning of the new dequeue pointer after a 'Set TR Dequeue Pointer' command. Cadence controllers may fail to start from the beginning of the dequeue TRB as it doesn't clear the Opaque 'RsvdO' field of the stream context during 'Set TR Dequeue' command. This stream context area is where xHC stores information about the last partially executed TD when a stream is stopped. xHC uses this information to resume the transfer where it left mid TD, when the stream is restarted. Patch fixes this by clearing out all RsvdO fields before initializing new Stream transfer using a 'Set TR Dequeue Pointer' command. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: stable@vger.kernel.org Signed-off-by: Pawel Laszczak Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB95386A40146E3EC64086F409DD9D2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/cdns3/host.c | 4 +++- drivers/usb/host/xhci-pci.c | 7 +++++++ drivers/usb/host/xhci-ring.c | 14 ++++++++++++++ drivers/usb/host/xhci.h | 1 + 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index ceca4d839dfd..7ba760ee62e3 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -62,7 +62,9 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; -static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci = { + .quirks = XHCI_CDNS_SCTX_QUIRK, +}; static int __cdns_host_init(struct cdns *cdns) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c6fb9958211..dd02b064ecd4 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -75,6 +75,9 @@ #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142 #define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242 +#define PCI_DEVICE_ID_CADENCE 0x17CD +#define PCI_DEVICE_ID_CADENCE_SSP 0x0200 + static const char hcd_name[] = "xhci_hcd"; static struct hc_driver __read_mostly xhci_pci_hc_driver; @@ -539,6 +542,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; } + if (pdev->vendor == PCI_DEVICE_ID_CADENCE && + pdev->device == PCI_DEVICE_ID_CADENCE_SSP) + xhci->quirks |= XHCI_CDNS_SCTX_QUIRK; + /* xHC spec requires PCI devices to support D3hot and D3cold */ if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 8dd85221cd92..95d8cf24b015 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1414,6 +1414,20 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, struct xhci_stream_ctx *ctx = &ep->stream_info->stream_ctx_array[stream_id]; deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK; + + /* + * Cadence xHCI controllers store some endpoint state + * information within Rsvd0 fields of Stream Endpoint + * context. This field is not cleared during Set TR + * Dequeue Pointer command which causes XDMA to skip + * over transfer ring and leads to data loss on stream + * pipe. + * To fix this issue driver must clear Rsvd0 field. + */ + if (xhci->quirks & XHCI_CDNS_SCTX_QUIRK) { + ctx->reserved[0] = 0; + ctx->reserved[1] = 0; + } } else { deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK; } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8e4d465b9dd6..f5efee06ff06 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1915,6 +1915,7 @@ struct xhci_hcd { #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) +#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) unsigned int num_active_eps; unsigned int limit_active_eps; From 8b48ea27185d8ce6c2ccfb2b714e54facfb1c5d9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Apr 2024 12:10:17 +0200 Subject: [PATCH 483/534] pps: remove usage of the deprecated ida_simple_xx() API [ Upstream commit 55dbc5b5174d0e7d1fa397d05aa4cb145e8b887e ] ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). This is less verbose. Link: https://lkml.kernel.org/r/9f681747d446b874952a892491387d79ffe565a9.1713089394.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Rodolfo Giometti Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Stable-dep-of: 62c5a01a5711 ("pps: add an error check in parport_attach") Signed-off-by: Sasha Levin --- drivers/pps/clients/pps_parport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c index 42f93d4c6ee3..af972cdc04b5 100644 --- a/drivers/pps/clients/pps_parport.c +++ b/drivers/pps/clients/pps_parport.c @@ -148,7 +148,7 @@ static void parport_attach(struct parport *port) return; } - index = ida_simple_get(&pps_client_index, 0, 0, GFP_KERNEL); + index = ida_alloc(&pps_client_index, GFP_KERNEL); memset(&pps_client_cb, 0, sizeof(pps_client_cb)); pps_client_cb.private = device; pps_client_cb.irq_func = parport_irq; @@ -188,7 +188,7 @@ err_release_dev: err_unregister_dev: parport_unregister_device(device->pardev); err_free: - ida_simple_remove(&pps_client_index, index); + ida_free(&pps_client_index, index); kfree(device); } @@ -208,7 +208,7 @@ static void parport_detach(struct parport *port) pps_unregister_source(device->pps); parport_release(pardev); parport_unregister_device(pardev); - ida_simple_remove(&pps_client_index, device->index); + ida_free(&pps_client_index, device->index); kfree(device); } From 73c1928a007617acf6bfd23297440c99aea0cc16 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 28 Aug 2024 21:18:14 +0800 Subject: [PATCH 484/534] pps: add an error check in parport_attach [ Upstream commit 62c5a01a5711c8e4be8ae7b6f0db663094615d48 ] In parport_attach, the return value of ida_alloc is unchecked, witch leads to the use of an invalid index value. To address this issue, index should be checked. When the index value is abnormal, the device should be freed. Found by code review, compile tested only. Cc: stable@vger.kernel.org Fixes: fb56d97df70e ("pps: client: use new parport device model") Signed-off-by: Ma Ke Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/20240828131814.3034338-1-make24@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/pps/clients/pps_parport.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c index af972cdc04b5..53e9c304ae0a 100644 --- a/drivers/pps/clients/pps_parport.c +++ b/drivers/pps/clients/pps_parport.c @@ -149,6 +149,9 @@ static void parport_attach(struct parport *port) } index = ida_alloc(&pps_client_index, GFP_KERNEL); + if (index < 0) + goto err_free_device; + memset(&pps_client_cb, 0, sizeof(pps_client_cb)); pps_client_cb.private = device; pps_client_cb.irq_func = parport_irq; @@ -159,7 +162,7 @@ static void parport_attach(struct parport *port) index); if (!device->pardev) { pr_err("couldn't register with %s\n", port->name); - goto err_free; + goto err_free_ida; } if (parport_claim_or_block(device->pardev) < 0) { @@ -187,8 +190,9 @@ err_release_dev: parport_release(device->pardev); err_unregister_dev: parport_unregister_device(device->pardev); -err_free: +err_free_ida: ida_free(&pps_client_index, index); +err_free_device: kfree(device); } From 88e26a196aa403eaa93754a4ff6f876dff655560 Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Sun, 24 Dec 2023 14:12:00 +0100 Subject: [PATCH 485/534] tty: serial: kgdboc: Fix 8250_* kgdb over serial [ Upstream commit 788aeef392d27545ae99af2875068a9dd0531d5f ] Check if port type is not PORT_UNKNOWN during poll_init. The kgdboc calls the tty_find_polling_driver that check if the serial is able to use poll_init. The poll_init calls the uart uart_poll_init that try to configure the uart with the selected boot parameters. The uart must be ready before setting parameters. Seems that PORT_UNKNOWN is already used by other functions in serial_core to detect uart status, so use the same to avoid to use it in invalid state. The crash happen for instance in am62x architecture where the 8250 register the platform driver after the 8250 core is initialized. Follow the report crash coming from KGDB Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1] _outb (addr=, value=) at ./include/asm-generic/io.h:584 584 __raw_writeb(value, PCI_IOBASE + addr); (gdb) bt This section of the code is too early because in this case the omap serial is not probed Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1] _outb (addr=, value=) at ./include/asm-generic/io.h:584 584 __raw_writeb(value, PCI_IOBASE + addr); (gdb) bt Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1] _outb (addr=, value=) at ./include/asm-generic/io.h:584 584 __raw_writeb(value, PCI_IOBASE + addr); (gdb) bt 0 _outb (addr=, value=) at ./include/asm-generic/io.h:584 1 logic_outb (value=0 '\000', addr=18446739675637874689) at lib/logic_pio.c:299 2 0xffff80008082dfcc in io_serial_out (p=0x0, offset=16760830, value=0) at drivers/tty/serial/8250/8250_port.c:416 3 0xffff80008082fe34 in serial_port_out (value=, offset=, up=) at ./include/linux/serial_core.h:677 4 serial8250_do_set_termios (port=0xffff8000828ee940 , termios=0xffff80008292b93c, old=0x0) at drivers/tty/serial/8250/8250_port.c:2860 5 0xffff800080830064 in serial8250_set_termios (port=0xfffffbfffe800000, termios=0xffbffe, old=0x0) at drivers/tty/serial/8250/8250_port.c:2912 6 0xffff80008082571c in uart_set_options (port=0xffff8000828ee940 , co=0x0, baud=115200, parity=110, bits=8, flow=110) at drivers/tty/serial/serial_core.c:2285 7 0xffff800080828434 in uart_poll_init (driver=0xfffffbfffe800000, line=16760830, options=0xffff8000828f7506 "115200n8") at drivers/tty/serial/serial_core.c:2656 8 0xffff800080801690 in tty_find_polling_driver (name=0xffff8000828f7500 "ttyS2,115200n8", line=0xffff80008292ba90) at drivers/tty/tty_io.c:410 9 0xffff80008086c0b0 in configure_kgdboc () at drivers/tty/serial/kgdboc.c:194 10 0xffff80008086c1ec in kgdboc_probe (pdev=0xfffffbfffe800000) at drivers/tty/serial/kgdboc.c:249 11 0xffff8000808b399c in platform_probe (_dev=0xffff000000ebb810) at drivers/base/platform.c:1404 12 0xffff8000808b0b44 in call_driver_probe (drv=, dev=) at drivers/base/dd.c:579 13 really_probe (dev=0xffff000000ebb810, drv=0xffff80008277f138 ) at drivers/base/dd.c:658 14 0xffff8000808b0d2c in __driver_probe_device (drv=0xffff80008277f138 , dev=0xffff000000ebb810) at drivers/base/dd.c:800 15 0xffff8000808b0eb8 in driver_probe_device (drv=0xfffffbfffe800000, dev=0xffff000000ebb810) at drivers/base/dd.c:830 16 0xffff8000808b0ff4 in __device_attach_driver (drv=0xffff80008277f138 , _data=0xffff80008292bc48) at drivers/base/dd.c:958 17 0xffff8000808ae970 in bus_for_each_drv (bus=0xfffffbfffe800000, start=0x0, data=0xffff80008292bc48, fn=0xffff8000808b0f3c <__device_attach_driver>) at drivers/base/bus.c:457 18 0xffff8000808b1408 in __device_attach (dev=0xffff000000ebb810, allow_async=true) at drivers/base/dd.c:1030 19 0xffff8000808b16d8 in device_initial_probe (dev=0xfffffbfffe800000) at drivers/base/dd.c:1079 20 0xffff8000808af9f4 in bus_probe_device (dev=0xffff000000ebb810) at drivers/base/bus.c:532 21 0xffff8000808ac77c in device_add (dev=0xfffffbfffe800000) at drivers/base/core.c:3625 22 0xffff8000808b3428 in platform_device_add (pdev=0xffff000000ebb800) at drivers/base/platform.c:716 23 0xffff800081b5dc0c in init_kgdboc () at drivers/tty/serial/kgdboc.c:292 24 0xffff800080014db0 in do_one_initcall (fn=0xffff800081b5dba4 ) at init/main.c:1236 25 0xffff800081b0114c in do_initcall_level (command_line=, level=) at init/main.c:1298 26 do_initcalls () at init/main.c:1314 27 do_basic_setup () at init/main.c:1333 28 kernel_init_freeable () at init/main.c:1551 29 0xffff8000810271ec in kernel_init (unused=0xfffffbfffe800000) at init/main.c:1441 30 0xffff800080015e80 in ret_from_fork () at arch/arm64/kernel/entry.S:857 Reviewed-by: Douglas Anderson Signed-off-by: Michael Trimarchi Link: https://lore.kernel.org/r/20231224131200.266224-1-michael@amarulasolutions.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: d0009a32c9e4 ("serial: don't use uninitialized value in uart_poll_init()") Signed-off-by: Sasha Levin --- drivers/tty/serial/serial_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ed8798fdf522..3fb55f46a891 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2697,7 +2697,8 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options) mutex_lock(&tport->mutex); port = uart_port_check(state); - if (!port || !(port->ops->poll_get_char && port->ops->poll_put_char)) { + if (!port || port->type == PORT_UNKNOWN || + !(port->ops->poll_get_char && port->ops->poll_put_char)) { ret = -1; goto out; } From 1d8c1add5e36e04f1ac067b1271f146bcee9fd14 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 5 Aug 2024 12:20:36 +0200 Subject: [PATCH 486/534] serial: don't use uninitialized value in uart_poll_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d0009a32c9e4e083358092f3c97e3c6e803a8930 ] Coverity reports (as CID 1536978) that uart_poll_init() passes uninitialized pm_state to uart_change_pm(). It is in case the first 'if' takes the true branch (does "goto out;"). Fix this and simplify the function by simple guard(mutex). The code needs no labels after this at all. And it is pretty clear that the code has not fiddled with pm_state at that point. Signed-off-by: Jiri Slaby (SUSE) Fixes: 5e227ef2aa38 (serial: uart_poll_init() should power on the UART) Cc: stable@vger.kernel.org Cc: Douglas Anderson Cc: Greg Kroah-Hartman Reviewed-by: Ilpo Järvinen Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20240805102046.307511-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/serial_core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 3fb55f46a891..5d5570bebfeb 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2694,14 +2694,13 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options) int ret = 0; tport = &state->port; - mutex_lock(&tport->mutex); + + guard(mutex)(&tport->mutex); port = uart_port_check(state); if (!port || port->type == PORT_UNKNOWN || - !(port->ops->poll_get_char && port->ops->poll_put_char)) { - ret = -1; - goto out; - } + !(port->ops->poll_get_char && port->ops->poll_put_char)) + return -1; pm_state = state->pm_state; uart_change_pm(state, UART_PM_STATE_ON); @@ -2721,10 +2720,10 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options) ret = uart_set_options(port, NULL, baud, parity, bits, flow); console_list_unlock(); } -out: + if (ret) uart_change_pm(state, pm_state); - mutex_unlock(&tport->mutex); + return ret; } From d5c5afdb9e1efe7e7061e3688356bdae50bfd174 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Dec 2023 02:50:11 -0800 Subject: [PATCH 487/534] x86/idtentry: Incorporate definitions/declarations of the FRED entries [ Upstream commit 90f357208200a941e90e75757123326684d715d0 ] FRED and IDT can share most of the definitions and declarations so that in the majority of cases the actual handler implementation is the same. The differences are the exceptions where FRED stores exception related information on the stack and the sysvec implementations as FRED can handle irqentry/exit() in the dispatcher instead of having it in each handler. Also add stub defines for vectors which are not used due to Kconfig decisions to spare the ifdeffery in the actual FRED dispatch code. Suggested-by: Thomas Gleixner Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-23-xin3.li@intel.com Stable-dep-of: 477d81a1c47a ("x86/entry: Remove unwanted instrumentation in common_interrupt()") Signed-off-by: Sasha Levin --- arch/x86/include/asm/idtentry.h | 71 +++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 13639e57e1f8..e2585f61082c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -13,15 +13,18 @@ #include +typedef void (*idtentry_t)(struct pt_regs *regs); + /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points * No error code pushed by hardware * @vector: Vector number (ignored for C) * @func: Function name of the entry point * - * Declares three functions: + * Declares four functions: * - The ASM entry point: asm_##func * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the FRED event dispatcher (maybe unused) * - The C handler called from the ASM entry point * * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it @@ -31,6 +34,7 @@ #define DECLARE_IDTENTRY(vector, func) \ asmlinkage void asm_##func(void); \ asmlinkage void xen_asm_##func(void); \ + void fred_##func(struct pt_regs *regs); \ __visible void func(struct pt_regs *regs) /** @@ -137,6 +141,17 @@ static __always_inline void __##func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_RAW(func) \ __visible noinstr void func(struct pt_regs *regs) +/** + * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points + * @func: Function name of the entry point + * + * @func is called from the FRED event dispatcher with interrupts disabled. + * + * See @DEFINE_IDTENTRY_RAW for further details. + */ +#define DEFINE_FREDENTRY_RAW(func) \ +noinstr void fred_##func(struct pt_regs *regs) + /** * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points * Error code pushed by hardware @@ -233,17 +248,27 @@ static noinline void __##func(struct pt_regs *regs, u32 vector) #define DEFINE_IDTENTRY_SYSVEC(func) \ static void __##func(struct pt_regs *regs); \ \ +static __always_inline void instr_##func(struct pt_regs *regs) \ +{ \ + kvm_set_cpu_l1tf_flush_l1d(); \ + run_sysvec_on_irqstack_cond(__##func, regs); \ +} \ + \ __visible noinstr void func(struct pt_regs *regs) \ { \ irqentry_state_t state = irqentry_enter(regs); \ \ instrumentation_begin(); \ - kvm_set_cpu_l1tf_flush_l1d(); \ - run_sysvec_on_irqstack_cond(__##func, regs); \ + instr_##func (regs); \ instrumentation_end(); \ irqentry_exit(regs, state); \ } \ \ +void fred_##func(struct pt_regs *regs) \ +{ \ + instr_##func (regs); \ +} \ + \ static noinline void __##func(struct pt_regs *regs) /** @@ -260,19 +285,29 @@ static noinline void __##func(struct pt_regs *regs) #define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \ static __always_inline void __##func(struct pt_regs *regs); \ \ +static __always_inline void instr_##func(struct pt_regs *regs) \ +{ \ + __irq_enter_raw(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs); \ + __irq_exit_raw(); \ +} \ + \ __visible noinstr void func(struct pt_regs *regs) \ { \ irqentry_state_t state = irqentry_enter(regs); \ \ instrumentation_begin(); \ - __irq_enter_raw(); \ - kvm_set_cpu_l1tf_flush_l1d(); \ - __##func (regs); \ - __irq_exit_raw(); \ + instr_##func (regs); \ instrumentation_end(); \ irqentry_exit(regs, state); \ } \ \ +void fred_##func(struct pt_regs *regs) \ +{ \ + instr_##func (regs); \ +} \ + \ static __always_inline void __##func(struct pt_regs *regs) /** @@ -410,15 +445,18 @@ __visible noinstr void func(struct pt_regs *regs, \ /* C-Code mapping */ #define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW #define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW +#define DEFINE_FREDENTRY_NMI DEFINE_FREDENTRY_RAW #ifdef CONFIG_X86_64 #define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST +#define DEFINE_FREDENTRY_MCE DEFINE_FREDENTRY_RAW #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST +#define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW #endif #else /* !__ASSEMBLY__ */ @@ -655,23 +693,36 @@ DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +#else +# define fred_sysvec_reschedule_ipi NULL +# define fred_sysvec_reboot NULL +# define fred_sysvec_call_function_single NULL +# define fred_sysvec_call_function NULL #endif #ifdef CONFIG_X86_LOCAL_APIC # ifdef CONFIG_X86_MCE_THRESHOLD DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +# else +# define fred_sysvec_threshold NULL # endif # ifdef CONFIG_X86_MCE_AMD DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error); +# else +# define fred_sysvec_deferred_error NULL # endif # ifdef CONFIG_X86_THERMAL_VECTOR DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal); +# else +# define fred_sysvec_thermal NULL # endif # ifdef CONFIG_IRQ_WORK DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +# else +# define fred_sysvec_irq_work NULL # endif #endif @@ -679,12 +730,16 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +#else +# define fred_sysvec_kvm_posted_intr_ipi NULL +# define fred_sysvec_kvm_posted_intr_wakeup_ipi NULL +# define fred_sysvec_kvm_posted_intr_nested_ipi NULL #endif #if IS_ENABLED(CONFIG_HYPERV) DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); -DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); +DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); #endif #if IS_ENABLED(CONFIG_ACRN_GUEST) From 32e93cae4dc456ad731ff9cd8b29bc8e9716ad5f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 11 Jun 2024 09:50:30 +0200 Subject: [PATCH 488/534] x86/entry: Remove unwanted instrumentation in common_interrupt() [ Upstream commit 477d81a1c47a1b79b9c08fc92b5dea3c5143800b ] common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(), which is neither marked noinstr nor __always_inline. So compiler puts it out of line and adds instrumentation to it. Since the call is inside of instrumentation_begin/end(), objtool does not warn about it. The manifestation is that KCOV produces spurious coverage in kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when preempt count is not yet updated to say that the kernel is in an interrupt. Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the instrumentation_begin/end() section. It only calls __this_cpu_write() which is already safe to call in noinstr contexts. Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC") Signed-off-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Reviewed-by: Alexander Potapenko Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.1718092070.git.dvyukov@google.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/hardirq.h | 8 ++++++-- arch/x86/include/asm/idtentry.h | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 66837b8c67f1..f2e245741afc 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -63,7 +63,11 @@ extern u64 arch_irq_stat(void); #define local_softirq_pending_ref pcpu_hot.softirq_pending #if IS_ENABLED(CONFIG_KVM_INTEL) -static inline void kvm_set_cpu_l1tf_flush_l1d(void) +/* + * This function is called from noinstr interrupt contexts + * and must be inlined to not get instrumentation. + */ +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); } @@ -78,7 +82,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void) return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); } #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */ -static inline void kvm_set_cpu_l1tf_flush_l1d(void) { } +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { } #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */ #endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index e2585f61082c..10603e185111 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \ irqentry_state_t state = irqentry_enter(regs); \ u32 vector = (u32)(u8)error_code; \ \ + kvm_set_cpu_l1tf_flush_l1d(); \ instrumentation_begin(); \ - kvm_set_cpu_l1tf_flush_l1d(); \ run_irq_on_irqstack_cond(__##func, regs, vector); \ instrumentation_end(); \ irqentry_exit(regs, state); \ @@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \ \ static __always_inline void instr_##func(struct pt_regs *regs) \ { \ - kvm_set_cpu_l1tf_flush_l1d(); \ run_sysvec_on_irqstack_cond(__##func, regs); \ } \ \ @@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \ { \ irqentry_state_t state = irqentry_enter(regs); \ \ + kvm_set_cpu_l1tf_flush_l1d(); \ instrumentation_begin(); \ instr_##func (regs); \ instrumentation_end(); \ @@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \ static __always_inline void instr_##func(struct pt_regs *regs) \ { \ __irq_enter_raw(); \ - kvm_set_cpu_l1tf_flush_l1d(); \ __##func (regs); \ __irq_exit_raw(); \ } \ @@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \ { \ irqentry_state_t state = irqentry_enter(regs); \ \ + kvm_set_cpu_l1tf_flush_l1d(); \ instrumentation_begin(); \ instr_##func (regs); \ instrumentation_end(); \ From 459b724c3c31b80b13256e51bd917b7ea7282a7a Mon Sep 17 00:00:00 2001 From: Syed Nayyar Waris Date: Wed, 27 Mar 2024 16:23:38 +0100 Subject: [PATCH 489/534] lib/bitmap: add bitmap_{read,write}() [ Upstream commit 63c15822b8dd02a2423cfd92232245ace3f7a11b ] The two new functions allow reading/writing values of length up to BITS_PER_LONG bits at arbitrary position in the bitmap. The code was taken from "bitops: Introduce the for_each_set_clump macro" by Syed Nayyar Waris with a number of changes and simplifications: - instead of using roundup(), which adds an unnecessary dependency on , we calculate space as BITS_PER_LONG-offset; - indentation is reduced by not using else-clauses (suggested by checkpatch for bitmap_get_value()); - bitmap_get_value()/bitmap_set_value() are renamed to bitmap_read() and bitmap_write(); - some redundant computations are omitted. Cc: Arnd Bergmann Signed-off-by: Syed Nayyar Waris Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/lkml/fe12eedf3666f4af5138de0e70b67a07c7f40338.1592224129.git.syednwaris@gmail.com/ Suggested-by: Yury Norov Co-developed-by: Alexander Potapenko Signed-off-by: Alexander Potapenko Reviewed-by: Andy Shevchenko Acked-by: Yury Norov Signed-off-by: Yury Norov Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller Stable-dep-of: 77b0b98bb743 ("btrfs: subpage: fix the bitmap dump which can cause bitmap corruption") Signed-off-by: Sasha Levin --- include/linux/bitmap.h | 77 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3c8d2b87a9ed..729ec2453149 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -77,6 +77,10 @@ struct device; * bitmap_to_arr64(buf, src, nbits) Copy nbits from buf to u64[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start + * bitmap_read(map, start, nbits) Read an nbits-sized value from + * map at start + * bitmap_write(map, value, start, nbits) Write an nbits-sized value to + * map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long @@ -613,6 +617,79 @@ static inline void bitmap_set_value8(unsigned long *map, unsigned long value, map[index] |= value << offset; } +/** + * bitmap_read - read a value of n-bits from the memory region + * @map: address to the bitmap memory region + * @start: bit offset of the n-bit value + * @nbits: size of value in bits, nonzero, up to BITS_PER_LONG + * + * Returns: value of @nbits bits located at the @start bit offset within the + * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return + * value is undefined. + */ +static inline unsigned long bitmap_read(const unsigned long *map, + unsigned long start, + unsigned long nbits) +{ + size_t index = BIT_WORD(start); + unsigned long offset = start % BITS_PER_LONG; + unsigned long space = BITS_PER_LONG - offset; + unsigned long value_low, value_high; + + if (unlikely(!nbits || nbits > BITS_PER_LONG)) + return 0; + + if (space >= nbits) + return (map[index] >> offset) & BITMAP_LAST_WORD_MASK(nbits); + + value_low = map[index] & BITMAP_FIRST_WORD_MASK(start); + value_high = map[index + 1] & BITMAP_LAST_WORD_MASK(start + nbits); + return (value_low >> offset) | (value_high << space); +} + +/** + * bitmap_write - write n-bit value within a memory region + * @map: address to the bitmap memory region + * @value: value to write, clamped to nbits + * @start: bit offset of the n-bit value + * @nbits: size of value in bits, nonzero, up to BITS_PER_LONG. + * + * bitmap_write() behaves as-if implemented as @nbits calls of __assign_bit(), + * i.e. bits beyond @nbits are ignored: + * + * for (bit = 0; bit < nbits; bit++) + * __assign_bit(start + bit, bitmap, val & BIT(bit)); + * + * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed. + */ +static inline void bitmap_write(unsigned long *map, unsigned long value, + unsigned long start, unsigned long nbits) +{ + size_t index; + unsigned long offset; + unsigned long space; + unsigned long mask; + bool fit; + + if (unlikely(!nbits || nbits > BITS_PER_LONG)) + return; + + mask = BITMAP_LAST_WORD_MASK(nbits); + value &= mask; + offset = start % BITS_PER_LONG; + space = BITS_PER_LONG - offset; + fit = space >= nbits; + index = BIT_WORD(start); + + map[index] &= (fit ? (~(mask << offset)) : ~BITMAP_FIRST_WORD_MASK(start)); + map[index] |= value << offset; + if (fit) + return; + + map[index + 1] &= BITMAP_FIRST_WORD_MASK(start + nbits); + map[index + 1] |= (value >> space); +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ From 0131bf19a141d57ccd1f158df5be24baec5f8d99 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 30 Aug 2024 16:35:48 +0930 Subject: [PATCH 490/534] btrfs: subpage: fix the bitmap dump which can cause bitmap corruption [ Upstream commit 77b0b98bb743f5d04d8f995ba1936e1143689d4a ] In commit 75258f20fb70 ("btrfs: subpage: dump extra subpage bitmaps for debug") an internal macro GET_SUBPAGE_BITMAP() is introduced to grab the bitmap of each attribute. But that commit is using bitmap_cut() which will do the left shift of the larger bitmap, causing incorrect values. Thankfully this bitmap_cut() is only called for debug usage, and so far it's not yet causing problem. Fix it to use bitmap_read() to only grab the desired sub-bitmap. Fixes: 75258f20fb70 ("btrfs: subpage: dump extra subpage bitmaps for debug") CC: stable@vger.kernel.org # 6.6+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/subpage.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 1b999c6e4193..b98d42ca5564 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -713,8 +713,14 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, } #define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \ - bitmap_cut(dst, subpage->bitmaps, 0, \ - subpage_info->name##_offset, subpage_info->bitmap_nr_bits) +{ \ + const int bitmap_nr_bits = subpage_info->bitmap_nr_bits; \ + \ + ASSERT(bitmap_nr_bits < BITS_PER_LONG); \ + *dst = bitmap_read(subpage->bitmaps, \ + subpage_info->name##_offset, \ + bitmap_nr_bits); \ +} void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) From a0cc053ba1e21c1b8acaa0c3a8eaefa2970cd2a6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Sep 2023 21:04:32 +0200 Subject: [PATCH 491/534] btrfs: reorder btrfs_inode to fill gaps [ Upstream commit 398fb9131f31bd25aa187613c9942f4232e952b7 ] Previous commit created a hole in struct btrfs_inode, we can move outstanding_extents there. This reduces size by 8 bytes from 1120 to 1112 on a release config. Signed-off-by: David Sterba Stable-dep-of: 7ee85f5515e8 ("btrfs: fix race setting file private on concurrent lseek using same fd") Signed-off-by: Sasha Levin --- fs/btrfs/btrfs_inode.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index bda1fdbba666..b75c7f593478 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -102,6 +102,14 @@ struct btrfs_inode { /* held while logging the inode in tree-log.c */ struct mutex log_mutex; + /* + * Counters to keep track of the number of extent item's we may use due + * to delalloc and such. outstanding_extents is the number of extent + * items we think we'll end up using, and reserved_extents is the number + * of extent items we've reserved metadata for. + */ + unsigned outstanding_extents; + /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; @@ -223,14 +231,6 @@ struct btrfs_inode { /* Read-only compatibility flags, upper half of inode_item::flags */ u32 ro_flags; - /* - * Counters to keep track of the number of extent item's we may use due - * to delalloc and such. outstanding_extents is the number of extent - * items we think we'll end up using, and reserved_extents is the number - * of extent items we've reserved metadata for. - */ - unsigned outstanding_extents; - struct btrfs_block_rsv block_rsv; /* From 971d03cd457a1c701bf4cc0d299f8a48d0f49037 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 4 Oct 2023 11:38:53 +0100 Subject: [PATCH 492/534] btrfs: update comment for struct btrfs_inode::lock [ Upstream commit 68539bd0e73b457f88a9d00cabb6533ec8582dc9 ] Update the comment for the lock named "lock" in struct btrfs_inode because it does not mention that the fields "delalloc_bytes", "defrag_bytes", "csum_bytes", "outstanding_extents" and "disk_i_size" are also protected by that lock. Also add a comment on top of each field protected by this lock to mention that the lock protects them. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: 7ee85f5515e8 ("btrfs: fix race setting file private on concurrent lseek using same fd") Signed-off-by: Sasha Levin --- fs/btrfs/btrfs_inode.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b75c7f593478..e9a979d0a958 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -82,8 +82,9 @@ struct btrfs_inode { /* * Lock for counters and all fields used to determine if the inode is in * the log or not (last_trans, last_sub_trans, last_log_commit, - * logged_trans), to access/update new_delalloc_bytes and to update the - * VFS' inode number of bytes used. + * logged_trans), to access/update delalloc_bytes, new_delalloc_bytes, + * defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to + * update the VFS' inode number of bytes used. */ spinlock_t lock; @@ -106,7 +107,7 @@ struct btrfs_inode { * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number - * of extent items we've reserved metadata for. + * of extent items we've reserved metadata for. Protected by 'lock'. */ unsigned outstanding_extents; @@ -130,28 +131,31 @@ struct btrfs_inode { u64 generation; /* - * transid of the trans_handle that last modified this inode + * ID of the transaction handle that last modified this inode. + * Protected by 'lock'. */ u64 last_trans; /* - * transid that last logged this inode + * ID of the transaction that last logged this inode. + * Protected by 'lock'. */ u64 logged_trans; /* - * log transid when this inode was last modified + * Log transaction ID when this inode was last modified. + * Protected by 'lock'. */ int last_sub_trans; - /* a local copy of root's last_log_commit */ + /* A local copy of root's last_log_commit. Protected by 'lock'. */ int last_log_commit; union { /* * Total number of bytes pending delalloc, used by stat to * calculate the real block usage of the file. This is used - * only for files. + * only for files. Protected by 'lock'. */ u64 delalloc_bytes; /* @@ -169,7 +173,7 @@ struct btrfs_inode { * Total number of bytes pending delalloc that fall within a file * range that is either a hole or beyond EOF (and no prealloc extent * exists in the range). This is always <= delalloc_bytes and this - * is used only for files. + * is used only for files. Protected by 'lock'. */ u64 new_delalloc_bytes; /* @@ -180,15 +184,15 @@ struct btrfs_inode { }; /* - * total number of bytes pending defrag, used by stat to check whether - * it needs COW. + * Total number of bytes pending defrag, used by stat to check whether + * it needs COW. Protected by 'lock'. */ u64 defrag_bytes; /* - * the size of the file stored in the metadata on disk. data=ordered + * The size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk - * because not all the blocks are written yet. + * because not all the blocks are written yet. Protected by 'lock'. */ u64 disk_i_size; @@ -222,7 +226,7 @@ struct btrfs_inode { /* * Number of bytes outstanding that are going to need csums. This is - * used in ENOSPC accounting. + * used in ENOSPC accounting. Protected by 'lock'. */ u64 csum_bytes; From f56a6d9c267ec7fa558ede7755551c047b1034cd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 Sep 2024 10:55:36 +0100 Subject: [PATCH 493/534] btrfs: fix race setting file private on concurrent lseek using same fd [ Upstream commit 7ee85f5515e86a4e2a2f51969795920733912bad ] When doing concurrent lseek(2) system calls against the same file descriptor, using multiple threads belonging to the same process, we have a short time window where a race happens and can result in a memory leak. The race happens like this: 1) A program opens a file descriptor for a file and then spawns two threads (with the pthreads library for example), lets call them task A and task B; 2) Task A calls lseek with SEEK_DATA or SEEK_HOLE and ends up at file.c:find_desired_extent() while holding a read lock on the inode; 3) At the start of find_desired_extent(), it extracts the file's private_data pointer into a local variable named 'private', which has a value of NULL; 4) Task B also calls lseek with SEEK_DATA or SEEK_HOLE, locks the inode in shared mode and enters file.c:find_desired_extent(), where it also extracts file->private_data into its local variable 'private', which has a NULL value; 5) Because it saw a NULL file private, task A allocates a private structure and assigns to the file structure; 6) Task B also saw a NULL file private so it also allocates its own file private and then assigns it to the same file structure, since both tasks are using the same file descriptor. At this point we leak the private structure allocated by task A. Besides the memory leak, there's also the detail that both tasks end up using the same cached state record in the private structure (struct btrfs_file_private::llseek_cached_state), which can result in a use-after-free problem since one task can free it while the other is still using it (only one task took a reference count on it). Also, sharing the cached state is not a good idea since it could result in incorrect results in the future - right now it should not be a problem because it end ups being used only in extent-io-tree.c:count_range_bits() where we do range validation before using the cached state. Fix this by protecting the private assignment and check of a file while holding the inode's spinlock and keep track of the task that allocated the private, so that it's used only by that task in order to prevent user-after-free issues with the cached state record as well as potentially using it incorrectly in the future. Fixes: 3c32c7212f16 ("btrfs: use cached state when looking for delalloc ranges with lseek") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.h | 2 ++ fs/btrfs/file.c | 34 +++++++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e9a979d0a958..ec6679a538c1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -85,6 +85,7 @@ struct btrfs_inode { * logged_trans), to access/update delalloc_bytes, new_delalloc_bytes, * defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to * update the VFS' inode number of bytes used. + * Also protects setting struct file::private_data. */ spinlock_t lock; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 06333a74d6c4..f7bb4c34b984 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -445,6 +445,8 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + /* Task that allocated this structure. */ + struct task_struct *owner_task; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 15fd8c00f4c0..fc6c91773bc8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3481,7 +3481,7 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence, static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) { struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host); - struct btrfs_file_private *private = file->private_data; + struct btrfs_file_private *private; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_state *cached_state = NULL; struct extent_state **delalloc_cached_state; @@ -3509,7 +3509,19 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) inode_get_bytes(&inode->vfs_inode) == i_size) return i_size; - if (!private) { + spin_lock(&inode->lock); + private = file->private_data; + spin_unlock(&inode->lock); + + if (private && private->owner_task != current) { + /* + * Not allocated by us, don't use it as its cached state is used + * by the task that allocated it and we don't want neither to + * mess with it nor get incorrect results because it reflects an + * invalid state for the current task. + */ + private = NULL; + } else if (!private) { private = kzalloc(sizeof(*private), GFP_KERNEL); /* * No worries if memory allocation failed. @@ -3517,7 +3529,23 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) * lseek SEEK_HOLE/DATA calls to a file when there's delalloc, * so everything will still be correct. */ - file->private_data = private; + if (private) { + bool free = false; + + private->owner_task = current; + + spin_lock(&inode->lock); + if (file->private_data) + free = true; + else + file->private_data = private; + spin_unlock(&inode->lock); + + if (free) { + kfree(private); + private = NULL; + } + } } if (private) From eb95bd96465c575d60fa649cd1687330f7d5fbc2 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 22 Jan 2024 17:15:10 +0800 Subject: [PATCH 494/534] dt-bindings: spi: nxp-fspi: support i.MX93 and i.MX95 [ Upstream commit 18ab9e9e8889ecba23a5e8b7f8924f09284e33d8 ] Add i.MX93/95 flexspi compatible strings, which are compatible with i.MX8MM Signed-off-by: Peng Fan Acked-by: Han Xu Acked-by: Conor Dooley Link: https://msgid.link/r/20240122091510.2077498-2-peng.fan@oss.nxp.com Signed-off-by: Mark Brown Stable-dep-of: 12736adc43b7 ("dt-bindings: spi: nxp-fspi: add imx8ulp support") Signed-off-by: Sasha Levin --- .../devicetree/bindings/spi/spi-nxp-fspi.yaml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml index 7fd591145480..4a5f41bde00f 100644 --- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml +++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml @@ -15,12 +15,18 @@ allOf: properties: compatible: - enum: - - nxp,imx8dxl-fspi - - nxp,imx8mm-fspi - - nxp,imx8mp-fspi - - nxp,imx8qxp-fspi - - nxp,lx2160a-fspi + oneOf: + - enum: + - nxp,imx8dxl-fspi + - nxp,imx8mm-fspi + - nxp,imx8mp-fspi + - nxp,imx8qxp-fspi + - nxp,lx2160a-fspi + - items: + - enum: + - nxp,imx93-fspi + - nxp,imx95-fspi + - const: nxp,imx8mm-fspi reg: items: From 159b1b45300c24b06147b7665bd18017a22bc66b Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 5 Sep 2024 17:43:35 +0800 Subject: [PATCH 495/534] dt-bindings: spi: nxp-fspi: add imx8ulp support [ Upstream commit 12736adc43b7cd5cb83f274f8f37b0f89d107c97 ] The flexspi on imx8ulp only has 16 number of LUTs, it is different with flexspi on other imx SoC which has 32 number of LUTs. Fixes: ef89fd56bdfc ("arm64: dts: imx8ulp: add flexspi node") Cc: stable@kernel.org Acked-by: Krzysztof Kozlowski Signed-off-by: Haibo Chen Reviewed-by: Frank Li Link: https://patch.msgid.link/20240905094338.1986871-2-haibo.chen@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml index 4a5f41bde00f..902db92da832 100644 --- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml +++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml @@ -21,6 +21,7 @@ properties: - nxp,imx8mm-fspi - nxp,imx8mp-fspi - nxp,imx8qxp-fspi + - nxp,imx8ulp-fspi - nxp,lx2160a-fspi - items: - enum: From 90135c317d1748153ef7420effbd78c1cd7fc58a Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:30:56 +0000 Subject: [PATCH 496/534] thunderbolt: Fix debug log when DisplayPort adapter not available for pairing [ Upstream commit 6b8ac54f31f985d3abb0b4212187838dd8ea4227 ] Fix debug log when looking for a DisplayPort adapter pair of DP IN and DP OUT. In case of no DP adapter available, log the type of the DP adapter that is not available. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index c5e10c1d4c38..6fc300edad68 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1311,13 +1311,12 @@ static void tb_tunnel_dp(struct tb *tb) continue; } - tb_port_dbg(port, "DP IN available\n"); + in = port; + tb_port_dbg(in, "DP IN available\n"); out = tb_find_dp_out(tb, port); - if (out) { - in = port; + if (out) break; - } } if (!in) { From 95f53ccfe6ed9d92ca2e8953a1d7a4e49aafe98e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:30:57 +0000 Subject: [PATCH 497/534] thunderbolt: Use tb_tunnel_dbg() where possible to make logging more consistent [ Upstream commit fe8a0293c922ee8bc1ff0cf9048075afb264004a ] This makes it easier to find out the tunnel in question. Also drop a couple of lines that generate duplicate information. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tunnel.c | 65 +++++++++++++++++------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index a6810fb36860..c0a8142f73f4 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -614,8 +614,9 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) in_rate = tb_dp_cap_get_rate(in_dp_cap); in_lanes = tb_dp_cap_get_lanes(in_dp_cap); - tb_port_dbg(in, "maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", - in_rate, in_lanes, tb_dp_bandwidth(in_rate, in_lanes)); + tb_tunnel_dbg(tunnel, + "DP IN maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", + in_rate, in_lanes, tb_dp_bandwidth(in_rate, in_lanes)); /* * If the tunnel bandwidth is limited (max_bw is set) then see @@ -624,8 +625,9 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) out_rate = tb_dp_cap_get_rate(out_dp_cap); out_lanes = tb_dp_cap_get_lanes(out_dp_cap); bw = tb_dp_bandwidth(out_rate, out_lanes); - tb_port_dbg(out, "maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", - out_rate, out_lanes, bw); + tb_tunnel_dbg(tunnel, + "DP OUT maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", + out_rate, out_lanes, bw); if (in->sw->config.depth < out->sw->config.depth) max_bw = tunnel->max_down; @@ -639,13 +641,14 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) out_rate, out_lanes, &new_rate, &new_lanes); if (ret) { - tb_port_info(out, "not enough bandwidth for DP tunnel\n"); + tb_tunnel_info(tunnel, "not enough bandwidth\n"); return ret; } new_bw = tb_dp_bandwidth(new_rate, new_lanes); - tb_port_dbg(out, "bandwidth reduced to %u Mb/s x%u = %u Mb/s\n", - new_rate, new_lanes, new_bw); + tb_tunnel_dbg(tunnel, + "bandwidth reduced to %u Mb/s x%u = %u Mb/s\n", + new_rate, new_lanes, new_bw); /* * Set new rate and number of lanes before writing it to @@ -662,7 +665,7 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) */ if (tb_route(out->sw) && tb_switch_is_titan_ridge(out->sw)) { out_dp_cap |= DP_COMMON_CAP_LTTPR_NS; - tb_port_dbg(out, "disabling LTTPR\n"); + tb_tunnel_dbg(tunnel, "disabling LTTPR\n"); } return tb_port_write(in, &out_dp_cap, TB_CFG_PORT, @@ -712,8 +715,8 @@ static int tb_dp_bandwidth_alloc_mode_enable(struct tb_tunnel *tunnel) lanes = min(in_lanes, out_lanes); tmp = tb_dp_bandwidth(rate, lanes); - tb_port_dbg(in, "non-reduced bandwidth %u Mb/s x%u = %u Mb/s\n", rate, - lanes, tmp); + tb_tunnel_dbg(tunnel, "non-reduced bandwidth %u Mb/s x%u = %u Mb/s\n", + rate, lanes, tmp); ret = usb4_dp_port_set_nrd(in, rate, lanes); if (ret) @@ -728,15 +731,15 @@ static int tb_dp_bandwidth_alloc_mode_enable(struct tb_tunnel *tunnel) rate = min(in_rate, out_rate); tmp = tb_dp_bandwidth(rate, lanes); - tb_port_dbg(in, - "maximum bandwidth through allocation mode %u Mb/s x%u = %u Mb/s\n", - rate, lanes, tmp); + tb_tunnel_dbg(tunnel, + "maximum bandwidth through allocation mode %u Mb/s x%u = %u Mb/s\n", + rate, lanes, tmp); for (granularity = 250; tmp / granularity > 255 && granularity <= 1000; granularity *= 2) ; - tb_port_dbg(in, "granularity %d Mb/s\n", granularity); + tb_tunnel_dbg(tunnel, "granularity %d Mb/s\n", granularity); /* * Returns -EINVAL if granularity above is outside of the @@ -756,7 +759,7 @@ static int tb_dp_bandwidth_alloc_mode_enable(struct tb_tunnel *tunnel) else estimated_bw = tunnel->max_up; - tb_port_dbg(in, "estimated bandwidth %d Mb/s\n", estimated_bw); + tb_tunnel_dbg(tunnel, "estimated bandwidth %d Mb/s\n", estimated_bw); ret = usb4_dp_port_set_estimated_bandwidth(in, estimated_bw); if (ret) @@ -767,7 +770,7 @@ static int tb_dp_bandwidth_alloc_mode_enable(struct tb_tunnel *tunnel) if (ret) return ret; - tb_port_dbg(in, "bandwidth allocation mode enabled\n"); + tb_tunnel_dbg(tunnel, "bandwidth allocation mode enabled\n"); return 0; } @@ -788,7 +791,7 @@ static int tb_dp_init(struct tb_tunnel *tunnel) if (!usb4_dp_port_bandwidth_mode_supported(in)) return 0; - tb_port_dbg(in, "bandwidth allocation mode supported\n"); + tb_tunnel_dbg(tunnel, "bandwidth allocation mode supported\n"); ret = usb4_dp_port_set_cm_id(in, tb->index); if (ret) @@ -805,7 +808,7 @@ static void tb_dp_deinit(struct tb_tunnel *tunnel) return; if (usb4_dp_port_bandwidth_mode_enabled(in)) { usb4_dp_port_set_cm_bandwidth_mode_supported(in, false); - tb_port_dbg(in, "bandwidth allocation mode disabled\n"); + tb_tunnel_dbg(tunnel, "bandwidth allocation mode disabled\n"); } } @@ -921,9 +924,6 @@ static int tb_dp_bandwidth_mode_consumed_bandwidth(struct tb_tunnel *tunnel, if (allocated_bw == max_bw) allocated_bw = ret; - tb_port_dbg(in, "consumed bandwidth through allocation mode %d Mb/s\n", - allocated_bw); - if (in->sw->config.depth < out->sw->config.depth) { *consumed_up = 0; *consumed_down = allocated_bw; @@ -1006,9 +1006,6 @@ static int tb_dp_alloc_bandwidth(struct tb_tunnel *tunnel, int *alloc_up, /* Now we can use BW mode registers to figure out the bandwidth */ /* TODO: need to handle discovery too */ tunnel->bw_mode = true; - - tb_port_dbg(in, "allocated bandwidth through allocation mode %d Mb/s\n", - tmp); return 0; } @@ -1035,8 +1032,7 @@ static int tb_dp_read_dprx(struct tb_tunnel *tunnel, u32 *rate, u32 *lanes, *rate = tb_dp_cap_get_rate(val); *lanes = tb_dp_cap_get_lanes(val); - tb_port_dbg(in, "consumed bandwidth through DPRX %d Mb/s\n", - tb_dp_bandwidth(*rate, *lanes)); + tb_tunnel_dbg(tunnel, "DPRX read done\n"); return 0; } usleep_range(100, 150); @@ -1073,9 +1069,6 @@ static int tb_dp_read_cap(struct tb_tunnel *tunnel, unsigned int cap, u32 *rate, *rate = tb_dp_cap_get_rate(val); *lanes = tb_dp_cap_get_lanes(val); - - tb_port_dbg(in, "bandwidth from %#x capability %d Mb/s\n", cap, - tb_dp_bandwidth(*rate, *lanes)); return 0; } @@ -1253,8 +1246,9 @@ static void tb_dp_dump(struct tb_tunnel *tunnel) rate = tb_dp_cap_get_rate(dp_cap); lanes = tb_dp_cap_get_lanes(dp_cap); - tb_port_dbg(in, "maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", - rate, lanes, tb_dp_bandwidth(rate, lanes)); + tb_tunnel_dbg(tunnel, + "DP IN maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", + rate, lanes, tb_dp_bandwidth(rate, lanes)); out = tunnel->dst_port; @@ -1265,8 +1259,9 @@ static void tb_dp_dump(struct tb_tunnel *tunnel) rate = tb_dp_cap_get_rate(dp_cap); lanes = tb_dp_cap_get_lanes(dp_cap); - tb_port_dbg(out, "maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", - rate, lanes, tb_dp_bandwidth(rate, lanes)); + tb_tunnel_dbg(tunnel, + "DP OUT maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", + rate, lanes, tb_dp_bandwidth(rate, lanes)); if (tb_port_read(in, &dp_cap, TB_CFG_PORT, in->cap_adap + DP_REMOTE_CAP, 1)) @@ -1275,8 +1270,8 @@ static void tb_dp_dump(struct tb_tunnel *tunnel) rate = tb_dp_cap_get_rate(dp_cap); lanes = tb_dp_cap_get_lanes(dp_cap); - tb_port_dbg(in, "reduced bandwidth %u Mb/s x%u = %u Mb/s\n", - rate, lanes, tb_dp_bandwidth(rate, lanes)); + tb_tunnel_dbg(tunnel, "reduced bandwidth %u Mb/s x%u = %u Mb/s\n", + rate, lanes, tb_dp_bandwidth(rate, lanes)); } /** From 6870e5b499f1d4fbc4ac4aacb3e64aad13b35924 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:30:58 +0000 Subject: [PATCH 498/534] thunderbolt: Expose tb_tunnel_xxx() log macros to the rest of the driver [ Upstream commit d27bd2c37d4666bce25ec4d9ac8c6b169992f0f0 ] In order to allow more consistent logging of tunnel related information make these logging macros available to the rest of the driver. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tunnel.c | 26 +++++--------------------- drivers/thunderbolt/tunnel.h | 24 +++++++++++++++++++++++- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index c0a8142f73f4..389b8dfc2447 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -58,27 +58,6 @@ MODULE_PARM_DESC(bw_alloc_mode, static const char * const tb_tunnel_names[] = { "PCI", "DP", "DMA", "USB3" }; -#define __TB_TUNNEL_PRINT(level, tunnel, fmt, arg...) \ - do { \ - struct tb_tunnel *__tunnel = (tunnel); \ - level(__tunnel->tb, "%llx:%u <-> %llx:%u (%s): " fmt, \ - tb_route(__tunnel->src_port->sw), \ - __tunnel->src_port->port, \ - tb_route(__tunnel->dst_port->sw), \ - __tunnel->dst_port->port, \ - tb_tunnel_names[__tunnel->type], \ - ## arg); \ - } while (0) - -#define tb_tunnel_WARN(tunnel, fmt, arg...) \ - __TB_TUNNEL_PRINT(tb_WARN, tunnel, fmt, ##arg) -#define tb_tunnel_warn(tunnel, fmt, arg...) \ - __TB_TUNNEL_PRINT(tb_warn, tunnel, fmt, ##arg) -#define tb_tunnel_info(tunnel, fmt, arg...) \ - __TB_TUNNEL_PRINT(tb_info, tunnel, fmt, ##arg) -#define tb_tunnel_dbg(tunnel, fmt, arg...) \ - __TB_TUNNEL_PRINT(tb_dbg, tunnel, fmt, ##arg) - static inline unsigned int tb_usable_credits(const struct tb_port *port) { return port->total_credits - port->ctl_credits; @@ -2382,3 +2361,8 @@ void tb_tunnel_reclaim_available_bandwidth(struct tb_tunnel *tunnel, tunnel->reclaim_available_bandwidth(tunnel, available_up, available_down); } + +const char *tb_tunnel_type_name(const struct tb_tunnel *tunnel) +{ + return tb_tunnel_names[tunnel->type]; +} diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h index bf690f7beeee..750ebb570d99 100644 --- a/drivers/thunderbolt/tunnel.h +++ b/drivers/thunderbolt/tunnel.h @@ -137,5 +137,27 @@ static inline bool tb_tunnel_is_usb3(const struct tb_tunnel *tunnel) return tunnel->type == TB_TUNNEL_USB3; } -#endif +const char *tb_tunnel_type_name(const struct tb_tunnel *tunnel); +#define __TB_TUNNEL_PRINT(level, tunnel, fmt, arg...) \ + do { \ + struct tb_tunnel *__tunnel = (tunnel); \ + level(__tunnel->tb, "%llx:%u <-> %llx:%u (%s): " fmt, \ + tb_route(__tunnel->src_port->sw), \ + __tunnel->src_port->port, \ + tb_route(__tunnel->dst_port->sw), \ + __tunnel->dst_port->port, \ + tb_tunnel_type_name(__tunnel), \ + ## arg); \ + } while (0) + +#define tb_tunnel_WARN(tunnel, fmt, arg...) \ + __TB_TUNNEL_PRINT(tb_WARN, tunnel, fmt, ##arg) +#define tb_tunnel_warn(tunnel, fmt, arg...) \ + __TB_TUNNEL_PRINT(tb_warn, tunnel, fmt, ##arg) +#define tb_tunnel_info(tunnel, fmt, arg...) \ + __TB_TUNNEL_PRINT(tb_info, tunnel, fmt, ##arg) +#define tb_tunnel_dbg(tunnel, fmt, arg...) \ + __TB_TUNNEL_PRINT(tb_dbg, tunnel, fmt, ##arg) + +#endif From ae2d54f5e5e93702d7af8bc8d43687309d250cfd Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:30:59 +0000 Subject: [PATCH 499/534] thunderbolt: Create multiple DisplayPort tunnels if there are more DP IN/OUT pairs [ Upstream commit 8648c6465c025c488e2855c209c0dea1a1a15184 ] Currently we only create one DisplayPort tunnel even if there would be more DP IN/OUT pairs available. Specifically this happens when a router is unplugged and we check if a new DisplayPort tunnel can be created. To cover this create tunnels as long as we find suitable DP IN/OUT pairs. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 6fc300edad68..e37fb1081420 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1282,18 +1282,13 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in) return NULL; } -static void tb_tunnel_dp(struct tb *tb) +static bool tb_tunnel_one_dp(struct tb *tb) { int available_up, available_down, ret, link_nr; struct tb_cm *tcm = tb_priv(tb); struct tb_port *port, *in, *out; struct tb_tunnel *tunnel; - if (!tb_acpi_may_tunnel_dp()) { - tb_dbg(tb, "DP tunneling disabled, not creating tunnel\n"); - return; - } - /* * Find pair of inactive DP IN and DP OUT adapters and then * establish a DP tunnel between them. @@ -1321,11 +1316,11 @@ static void tb_tunnel_dp(struct tb *tb) if (!in) { tb_dbg(tb, "no suitable DP IN adapter available, not tunneling\n"); - return; + return false; } if (!out) { tb_dbg(tb, "no suitable DP OUT adapter available, not tunneling\n"); - return; + return false; } /* @@ -1398,7 +1393,7 @@ static void tb_tunnel_dp(struct tb *tb) * TMU mode to HiFi for CL0s to work. */ tb_increase_tmu_accuracy(tunnel); - return; + return true; err_free: tb_tunnel_free(tunnel); @@ -1413,6 +1408,19 @@ err_rpm_put: pm_runtime_put_autosuspend(&out->sw->dev); pm_runtime_mark_last_busy(&in->sw->dev); pm_runtime_put_autosuspend(&in->sw->dev); + + return false; +} + +static void tb_tunnel_dp(struct tb *tb) +{ + if (!tb_acpi_may_tunnel_dp()) { + tb_dbg(tb, "DP tunneling disabled, not creating tunnel\n"); + return; + } + + while (tb_tunnel_one_dp(tb)) + ; } static void tb_enter_redrive(struct tb_port *port) From c014f37411d531c63ded4afb4e350ed82ac78be7 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:31:00 +0000 Subject: [PATCH 500/534] thunderbolt: Use constants for path weight and priority [ Upstream commit f73edddfa2a64a185c65a33f100778169c92fc25 ] Makes it easier to follow and update. No functional changes. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tunnel.c | 39 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 389b8dfc2447..9947b9d0d51a 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -21,12 +21,18 @@ #define TB_PCI_PATH_DOWN 0 #define TB_PCI_PATH_UP 1 +#define TB_PCI_PRIORITY 3 +#define TB_PCI_WEIGHT 1 + /* USB3 adapters use always HopID of 8 for both directions */ #define TB_USB3_HOPID 8 #define TB_USB3_PATH_DOWN 0 #define TB_USB3_PATH_UP 1 +#define TB_USB3_PRIORITY 3 +#define TB_USB3_WEIGHT 3 + /* DP adapters use HopID 8 for AUX and 9 for Video */ #define TB_DP_AUX_TX_HOPID 8 #define TB_DP_AUX_RX_HOPID 8 @@ -36,6 +42,12 @@ #define TB_DP_AUX_PATH_OUT 1 #define TB_DP_AUX_PATH_IN 2 +#define TB_DP_VIDEO_PRIORITY 1 +#define TB_DP_VIDEO_WEIGHT 1 + +#define TB_DP_AUX_PRIORITY 2 +#define TB_DP_AUX_WEIGHT 1 + /* Minimum number of credits needed for PCIe path */ #define TB_MIN_PCIE_CREDITS 6U /* @@ -46,6 +58,9 @@ /* Minimum number of credits for DMA path */ #define TB_MIN_DMA_CREDITS 1 +#define TB_DMA_PRIORITY 5 +#define TB_DMA_WEIGHT 1 + static unsigned int dma_credits = TB_DMA_CREDITS; module_param(dma_credits, uint, 0444); MODULE_PARM_DESC(dma_credits, "specify custom credits for DMA tunnels (default: " @@ -213,8 +228,8 @@ static int tb_pci_init_path(struct tb_path *path) path->egress_shared_buffer = TB_PATH_NONE; path->ingress_fc_enable = TB_PATH_ALL; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 3; - path->weight = 1; + path->priority = TB_PCI_PRIORITY; + path->weight = TB_PCI_WEIGHT; path->drop_packages = 0; tb_path_for_each_hop(path, hop) { @@ -1152,8 +1167,8 @@ static void tb_dp_init_aux_path(struct tb_path *path) path->egress_shared_buffer = TB_PATH_NONE; path->ingress_fc_enable = TB_PATH_ALL; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 2; - path->weight = 1; + path->priority = TB_DP_AUX_PRIORITY; + path->weight = TB_DP_AUX_WEIGHT; tb_path_for_each_hop(path, hop) tb_dp_init_aux_credits(hop); @@ -1196,8 +1211,8 @@ static int tb_dp_init_video_path(struct tb_path *path) path->egress_shared_buffer = TB_PATH_NONE; path->ingress_fc_enable = TB_PATH_NONE; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 1; - path->weight = 1; + path->priority = TB_DP_VIDEO_PRIORITY; + path->weight = TB_DP_VIDEO_WEIGHT; tb_path_for_each_hop(path, hop) { int ret; @@ -1471,8 +1486,8 @@ static int tb_dma_init_rx_path(struct tb_path *path, unsigned int credits) path->ingress_fc_enable = TB_PATH_ALL; path->egress_shared_buffer = TB_PATH_NONE; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 5; - path->weight = 1; + path->priority = TB_DMA_PRIORITY; + path->weight = TB_DMA_WEIGHT; path->clear_fc = true; /* @@ -1505,8 +1520,8 @@ static int tb_dma_init_tx_path(struct tb_path *path, unsigned int credits) path->ingress_fc_enable = TB_PATH_ALL; path->egress_shared_buffer = TB_PATH_NONE; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 5; - path->weight = 1; + path->priority = TB_DMA_PRIORITY; + path->weight = TB_DMA_WEIGHT; path->clear_fc = true; tb_path_for_each_hop(path, hop) { @@ -1845,8 +1860,8 @@ static void tb_usb3_init_path(struct tb_path *path) path->egress_shared_buffer = TB_PATH_NONE; path->ingress_fc_enable = TB_PATH_ALL; path->ingress_shared_buffer = TB_PATH_NONE; - path->priority = 3; - path->weight = 3; + path->priority = TB_USB3_PRIORITY; + path->weight = TB_USB3_WEIGHT; path->drop_packages = 0; tb_path_for_each_hop(path, hop) From 22081f72076449ca7ef3e802cc2c498206817f50 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:31:01 +0000 Subject: [PATCH 501/534] thunderbolt: Use weight constants in tb_usb3_consumed_bandwidth() [ Upstream commit 4d24db0c801461adeefd7e0bdc98c79c60ccefb0 ] Instead of magic numbers use the constants we introduced in the previous commit to make the code more readable. No functional changes. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tunnel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 9947b9d0d51a..b81344c6c06a 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -1747,14 +1747,17 @@ static int tb_usb3_activate(struct tb_tunnel *tunnel, bool activate) static int tb_usb3_consumed_bandwidth(struct tb_tunnel *tunnel, int *consumed_up, int *consumed_down) { - int pcie_enabled = tb_acpi_may_tunnel_pcie(); + int pcie_weight = tb_acpi_may_tunnel_pcie() ? TB_PCI_WEIGHT : 0; /* * PCIe tunneling, if enabled, affects the USB3 bandwidth so * take that it into account here. */ - *consumed_up = tunnel->allocated_up * (3 + pcie_enabled) / 3; - *consumed_down = tunnel->allocated_down * (3 + pcie_enabled) / 3; + *consumed_up = tunnel->allocated_up * + (TB_USB3_WEIGHT + pcie_weight) / TB_USB3_WEIGHT; + *consumed_down = tunnel->allocated_down * + (TB_USB3_WEIGHT + pcie_weight) / TB_USB3_WEIGHT; + return 0; } From 7b85d751081be8d3c3b8b5b2c12ce7eac3a06e04 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:02 +0000 Subject: [PATCH 502/534] thunderbolt: Make is_gen4_link() available to the rest of the driver [ Upstream commit aa673d606078da36ebc379f041c794228ac08cb5 ] Rework the function to return the link generation, update the name to tb_port_get_link_generation(), and make available to the rest of the driver. This is needed in the subsequent patches. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 36 +++++++++++++++++++++++++++++------- drivers/thunderbolt/tb.h | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index f9f40c0e9add..c7f16fd0a043 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -921,6 +921,32 @@ int tb_port_get_link_speed(struct tb_port *port) } } +/** + * tb_port_get_link_generation() - Returns link generation + * @port: Lane adapter + * + * Returns link generation as number or negative errno in case of + * failure. Does not distinguish between Thunderbolt 1 and Thunderbolt 2 + * links so for those always returns 2. + */ +int tb_port_get_link_generation(struct tb_port *port) +{ + int ret; + + ret = tb_port_get_link_speed(port); + if (ret < 0) + return ret; + + switch (ret) { + case 40: + return 4; + case 20: + return 3; + default: + return 2; + } +} + /** * tb_port_get_link_width() - Get current link width * @port: Port to check (USB4 or CIO) @@ -966,11 +992,6 @@ static bool tb_port_is_width_supported(struct tb_port *port, return widths & width_mask; } -static bool is_gen4_link(struct tb_port *port) -{ - return tb_port_get_link_speed(port) > 20; -} - /** * tb_port_set_link_width() - Set target link width of the lane adapter * @port: Lane adapter @@ -998,7 +1019,7 @@ int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width) switch (width) { case TB_LINK_WIDTH_SINGLE: /* Gen 4 link cannot be single */ - if (is_gen4_link(port)) + if (tb_port_get_link_generation(port) >= 4) return -EOPNOTSUPP; val |= LANE_ADP_CS_1_TARGET_WIDTH_SINGLE << LANE_ADP_CS_1_TARGET_WIDTH_SHIFT; @@ -1147,7 +1168,8 @@ int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask, int ret; /* Gen 4 link does not support single lane */ - if ((width_mask & TB_LINK_WIDTH_SINGLE) && is_gen4_link(port)) + if ((width_mask & TB_LINK_WIDTH_SINGLE) && + tb_port_get_link_generation(port) >= 4) return -EOPNOTSUPP; do { diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 8a75aabb9ce8..2f5f85666302 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1062,6 +1062,7 @@ static inline bool tb_port_use_credit_allocation(const struct tb_port *port) (p) = tb_next_port_on_path((src), (dst), (p))) int tb_port_get_link_speed(struct tb_port *port); +int tb_port_get_link_generation(struct tb_port *port); int tb_port_get_link_width(struct tb_port *port); int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width); int tb_port_lane_bonding_enable(struct tb_port *port); From 5ac89bb0062e960e0be3d0d7cbf56dca3fd48bab Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:03 +0000 Subject: [PATCH 503/534] thunderbolt: Change bandwidth reservations to comply USB4 v2 [ Upstream commit 582e70b0d3a412d15389a3c9c07a44791b311715 ] USB4 v2 Connection Manager guide (section 6.1.2.3) suggests to reserve bandwidth in a sligthly different manner. It suggests to keep minimum of 1500 Mb/s for each path that carry a bulk traffic. Here we change the bandwidth reservations to comply to the above for USB 3.x and PCIe protocols over Gen 4 link, taking weights into account (that's 1500 Mb/s for PCIe and 3000 Mb/s for USB 3.x). For Gen 3 and below we use the existing reservation. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 11 ++++++ drivers/thunderbolt/tunnel.c | 66 ++++++++++++++++++++++++++++++++++-- drivers/thunderbolt/tunnel.h | 2 ++ 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index e37fb1081420..183225bdbbf5 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -602,6 +602,7 @@ static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port, /* Find the minimum available bandwidth over all links */ tb_for_each_port_on_path(src_port, dst_port, port) { int link_speed, link_width, up_bw, down_bw; + int pci_reserved_up, pci_reserved_down; if (!tb_port_is_null(port)) continue; @@ -695,6 +696,16 @@ static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port, up_bw -= usb3_consumed_up; down_bw -= usb3_consumed_down; + /* + * If there is anything reserved for PCIe bulk traffic + * take it into account here too. + */ + if (tb_tunnel_reserved_pci(port, &pci_reserved_up, + &pci_reserved_down)) { + up_bw -= pci_reserved_up; + down_bw -= pci_reserved_down; + } + if (up_bw < *available_up) *available_up = up_bw; if (down_bw < *available_down) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index b81344c6c06a..e296ab5d657b 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -31,7 +31,7 @@ #define TB_USB3_PATH_UP 1 #define TB_USB3_PRIORITY 3 -#define TB_USB3_WEIGHT 3 +#define TB_USB3_WEIGHT 2 /* DP adapters use HopID 8 for AUX and 9 for Video */ #define TB_DP_AUX_TX_HOPID 8 @@ -61,6 +61,15 @@ #define TB_DMA_PRIORITY 5 #define TB_DMA_WEIGHT 1 +/* + * Reserve additional bandwidth for USB 3.x and PCIe bulk traffic + * according to USB4 v2 Connection Manager guide. This ends up reserving + * 1500 Mb/s for PCIe and 3000 Mb/s for USB 3.x taking weights into + * account. + */ +#define USB4_V2_PCI_MIN_BANDWIDTH (1500 * TB_PCI_WEIGHT) +#define USB4_V2_USB3_MIN_BANDWIDTH (1500 * TB_USB3_WEIGHT) + static unsigned int dma_credits = TB_DMA_CREDITS; module_param(dma_credits, uint, 0444); MODULE_PARM_DESC(dma_credits, "specify custom credits for DMA tunnels (default: " @@ -150,11 +159,11 @@ static struct tb_tunnel *tb_tunnel_alloc(struct tb *tb, size_t npaths, static int tb_pci_set_ext_encapsulation(struct tb_tunnel *tunnel, bool enable) { + struct tb_port *port = tb_upstream_port(tunnel->dst_port->sw); int ret; /* Only supported of both routers are at least USB4 v2 */ - if (usb4_switch_version(tunnel->src_port->sw) < 2 || - usb4_switch_version(tunnel->dst_port->sw) < 2) + if (tb_port_get_link_generation(port) < 4) return 0; ret = usb4_pci_port_set_ext_encapsulation(tunnel->src_port, enable); @@ -370,6 +379,51 @@ err_free: return NULL; } +/** + * tb_tunnel_reserved_pci() - Amount of bandwidth to reserve for PCIe + * @port: Lane 0 adapter + * @reserved_up: Upstream bandwidth in Mb/s to reserve + * @reserved_down: Downstream bandwidth in Mb/s to reserve + * + * Can be called to any connected lane 0 adapter to find out how much + * bandwidth needs to be left in reserve for possible PCIe bulk traffic. + * Returns true if there is something to be reserved and writes the + * amount to @reserved_down/@reserved_up. Otherwise returns false and + * does not touch the parameters. + */ +bool tb_tunnel_reserved_pci(struct tb_port *port, int *reserved_up, + int *reserved_down) +{ + if (WARN_ON_ONCE(!port->remote)) + return false; + + if (!tb_acpi_may_tunnel_pcie()) + return false; + + if (tb_port_get_link_generation(port) < 4) + return false; + + /* Must have PCIe adapters */ + if (tb_is_upstream_port(port)) { + if (!tb_switch_find_port(port->sw, TB_TYPE_PCIE_UP)) + return false; + if (!tb_switch_find_port(port->remote->sw, TB_TYPE_PCIE_DOWN)) + return false; + } else { + if (!tb_switch_find_port(port->sw, TB_TYPE_PCIE_DOWN)) + return false; + if (!tb_switch_find_port(port->remote->sw, TB_TYPE_PCIE_UP)) + return false; + } + + *reserved_up = USB4_V2_PCI_MIN_BANDWIDTH; + *reserved_down = USB4_V2_PCI_MIN_BANDWIDTH; + + tb_port_dbg(port, "reserving %u/%u Mb/s for PCIe\n", *reserved_up, + *reserved_down); + return true; +} + static bool tb_dp_is_usb4(const struct tb_switch *sw) { /* Titan Ridge DP adapters need the same treatment as USB4 */ @@ -1747,6 +1801,7 @@ static int tb_usb3_activate(struct tb_tunnel *tunnel, bool activate) static int tb_usb3_consumed_bandwidth(struct tb_tunnel *tunnel, int *consumed_up, int *consumed_down) { + struct tb_port *port = tb_upstream_port(tunnel->dst_port->sw); int pcie_weight = tb_acpi_may_tunnel_pcie() ? TB_PCI_WEIGHT : 0; /* @@ -1758,6 +1813,11 @@ static int tb_usb3_consumed_bandwidth(struct tb_tunnel *tunnel, *consumed_down = tunnel->allocated_down * (TB_USB3_WEIGHT + pcie_weight) / TB_USB3_WEIGHT; + if (tb_port_get_link_generation(port) >= 4) { + *consumed_up = max(*consumed_up, USB4_V2_USB3_MIN_BANDWIDTH); + *consumed_down = max(*consumed_down, USB4_V2_USB3_MIN_BANDWIDTH); + } + return 0; } diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h index 750ebb570d99..b4cff5482112 100644 --- a/drivers/thunderbolt/tunnel.h +++ b/drivers/thunderbolt/tunnel.h @@ -80,6 +80,8 @@ struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down, bool alloc_hopid); struct tb_tunnel *tb_tunnel_alloc_pci(struct tb *tb, struct tb_port *up, struct tb_port *down); +bool tb_tunnel_reserved_pci(struct tb_port *port, int *reserved_up, + int *reserved_down); struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in, bool alloc_hopid); struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in, From 18dcdadc994125ccd42288184a52b1867315e7fd Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:04 +0000 Subject: [PATCH 504/534] thunderbolt: Introduce tb_port_path_direction_downstream() [ Upstream commit 2bfeca73e94567c1a117ca45d2e8a25d63e5bd2c ] Introduce tb_port_path_direction_downstream() to check if path from source adapter to destination adapter is directed towards downstream. Convert existing users to call this helper instead of open-coding. No functional changes. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 6 +++--- drivers/thunderbolt/tb.h | 15 +++++++++++++++ drivers/thunderbolt/tunnel.c | 14 +++++++------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 183225bdbbf5..4ea0536ec5cf 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -553,7 +553,7 @@ static struct tb_tunnel *tb_find_first_usb3_tunnel(struct tb *tb, struct tb_switch *sw; /* Pick the router that is deepest in the topology */ - if (dst_port->sw->config.depth > src_port->sw->config.depth) + if (tb_port_path_direction_downstream(src_port, dst_port)) sw = dst_port->sw; else sw = src_port->sw; @@ -1223,7 +1223,7 @@ tb_recalc_estimated_bandwidth_for_group(struct tb_bandwidth_group *group) tb_port_dbg(in, "re-calculated estimated bandwidth %u/%u Mb/s\n", estimated_up, estimated_down); - if (in->sw->config.depth < out->sw->config.depth) + if (tb_port_path_direction_downstream(in, out)) estimated_bw = estimated_down; else estimated_bw = estimated_up; @@ -2002,7 +2002,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) out = tunnel->dst_port; - if (in->sw->config.depth < out->sw->config.depth) { + if (tb_port_path_direction_downstream(in, out)) { requested_up = -1; requested_down = requested_bw; } else { diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 2f5f85666302..6d66dd2a3ab0 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1044,6 +1044,21 @@ void tb_port_release_out_hopid(struct tb_port *port, int hopid); struct tb_port *tb_next_port_on_path(struct tb_port *start, struct tb_port *end, struct tb_port *prev); +/** + * tb_port_path_direction_downstream() - Checks if path directed downstream + * @src: Source adapter + * @dst: Destination adapter + * + * Returns %true only if the specified path from source adapter (@src) + * to destination adapter (@dst) is directed downstream. + */ +static inline bool +tb_port_path_direction_downstream(const struct tb_port *src, + const struct tb_port *dst) +{ + return src->sw->config.depth < dst->sw->config.depth; +} + static inline bool tb_port_use_credit_allocation(const struct tb_port *port) { return tb_port_is_null(port) && port->sw->credit_allocation; diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index e296ab5d657b..8aec678d80d3 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -677,7 +677,7 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) "DP OUT maximum supported bandwidth %u Mb/s x%u = %u Mb/s\n", out_rate, out_lanes, bw); - if (in->sw->config.depth < out->sw->config.depth) + if (tb_port_path_direction_downstream(in, out)) max_bw = tunnel->max_down; else max_bw = tunnel->max_up; @@ -802,7 +802,7 @@ static int tb_dp_bandwidth_alloc_mode_enable(struct tb_tunnel *tunnel) * max_up/down fields. For discovery we just read what the * estimation was set to. */ - if (in->sw->config.depth < out->sw->config.depth) + if (tb_port_path_direction_downstream(in, out)) estimated_bw = tunnel->max_down; else estimated_bw = tunnel->max_up; @@ -972,7 +972,7 @@ static int tb_dp_bandwidth_mode_consumed_bandwidth(struct tb_tunnel *tunnel, if (allocated_bw == max_bw) allocated_bw = ret; - if (in->sw->config.depth < out->sw->config.depth) { + if (tb_port_path_direction_downstream(in, out)) { *consumed_up = 0; *consumed_down = allocated_bw; } else { @@ -1007,7 +1007,7 @@ static int tb_dp_allocated_bandwidth(struct tb_tunnel *tunnel, int *allocated_up if (allocated_bw == max_bw) allocated_bw = ret; - if (in->sw->config.depth < out->sw->config.depth) { + if (tb_port_path_direction_downstream(in, out)) { *allocated_up = 0; *allocated_down = allocated_bw; } else { @@ -1035,7 +1035,7 @@ static int tb_dp_alloc_bandwidth(struct tb_tunnel *tunnel, int *alloc_up, if (ret < 0) return ret; - if (in->sw->config.depth < out->sw->config.depth) { + if (tb_port_path_direction_downstream(in, out)) { tmp = min(*alloc_down, max_bw); ret = usb4_dp_port_allocate_bandwidth(in, tmp); if (ret) @@ -1133,7 +1133,7 @@ static int tb_dp_maximum_bandwidth(struct tb_tunnel *tunnel, int *max_up, if (ret < 0) return ret; - if (in->sw->config.depth < tunnel->dst_port->sw->config.depth) { + if (tb_port_path_direction_downstream(in, tunnel->dst_port)) { *max_up = 0; *max_down = ret; } else { @@ -1191,7 +1191,7 @@ static int tb_dp_consumed_bandwidth(struct tb_tunnel *tunnel, int *consumed_up, return 0; } - if (in->sw->config.depth < tunnel->dst_port->sw->config.depth) { + if (tb_port_path_direction_downstream(in, tunnel->dst_port)) { *consumed_up = 0; *consumed_down = tb_dp_bandwidth(rate, lanes); } else { From e07bc5858e3e6484e2a0e093e4029488953822e1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:31:05 +0000 Subject: [PATCH 505/534] thunderbolt: Introduce tb_for_each_upstream_port_on_path() [ Upstream commit 956c3abe72fb6a651b8cf77c28462f7e5b6a48b1 ] This is useful when walking over upstream lane adapters over given path. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 6d66dd2a3ab0..4cd5f48e3dee 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1076,6 +1076,21 @@ static inline bool tb_port_use_credit_allocation(const struct tb_port *port) for ((p) = tb_next_port_on_path((src), (dst), NULL); (p); \ (p) = tb_next_port_on_path((src), (dst), (p))) +/** + * tb_for_each_upstream_port_on_path() - Iterate over each upstreamm port on path + * @src: Source port + * @dst: Destination port + * @p: Port used as iterator + * + * Walks over each upstream lane adapter on path from @src to @dst. + */ +#define tb_for_each_upstream_port_on_path(src, dst, p) \ + for ((p) = tb_next_port_on_path((src), (dst), NULL); (p); \ + (p) = tb_next_port_on_path((src), (dst), (p))) \ + if (!tb_port_is_null((p)) || !tb_is_upstream_port((p))) {\ + continue; \ + } else + int tb_port_get_link_speed(struct tb_port *port); int tb_port_get_link_generation(struct tb_port *port); int tb_port_get_link_width(struct tb_port *port); From 8f053095e13eb45b8e8fd1a244353e9845d303f4 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 1 Oct 2024 17:31:06 +0000 Subject: [PATCH 506/534] thunderbolt: Introduce tb_switch_depth() [ Upstream commit c4ff14436952c3d0dd05769d76cf48e73a253b48 ] This is useful helper to find out the depth of a connected router. Convert the existing users to call this helper instead of open-coding. No functional changes. Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 4 ++-- drivers/thunderbolt/tb.h | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 4ea0536ec5cf..39ec8da576ef 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -255,13 +255,13 @@ static int tb_enable_clx(struct tb_switch *sw) * this in the future to cover the whole topology if it turns * out to be beneficial. */ - while (sw && sw->config.depth > 1) + while (sw && tb_switch_depth(sw) > 1) sw = tb_switch_parent(sw); if (!sw) return 0; - if (sw->config.depth != 1) + if (tb_switch_depth(sw) != 1) return 0; /* diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 4cd5f48e3dee..d2ef9575231c 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -868,6 +868,15 @@ static inline struct tb_port *tb_switch_downstream_port(struct tb_switch *sw) return tb_port_at(tb_route(sw), tb_switch_parent(sw)); } +/** + * tb_switch_depth() - Returns depth of the connected router + * @sw: Router + */ +static inline int tb_switch_depth(const struct tb_switch *sw) +{ + return sw->config.depth; +} + static inline bool tb_switch_is_light_ridge(const struct tb_switch *sw) { return sw->config.vendor_id == PCI_VENDOR_ID_INTEL && From 9b6933e9bddc04655a894c74cd5c62202a5e3d89 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:07 +0000 Subject: [PATCH 507/534] thunderbolt: Add support for asymmetric link [ Upstream commit 81af2952e60603d12415e1a6fd200f8073a2ad8b ] USB4 v2 spec defines a Gen 4 link that can operate as an aggregated symmetric (80/80G) or asymmetric (120/40G). When the link is asymmetric, the USB4 port on one side of the link operates with three TX lanes and one RX lane, while the USB4 port on the opposite side of the link operates with three RX lanes and one TX lane. Add support for the asymmetric link and provide functions that can be used to transition the link to asymmetric and back. Signed-off-by: Gil Fine Co-developed-by: Mika Westerberg Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 294 +++++++++++++++++++++++++++++----- drivers/thunderbolt/tb.c | 11 +- drivers/thunderbolt/tb.h | 16 +- drivers/thunderbolt/tb_regs.h | 9 +- drivers/thunderbolt/usb4.c | 106 ++++++++++++ 5 files changed, 381 insertions(+), 55 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index c7f16fd0a043..6393ce44c253 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -947,6 +947,22 @@ int tb_port_get_link_generation(struct tb_port *port) } } +static const char *width_name(enum tb_link_width width) +{ + switch (width) { + case TB_LINK_WIDTH_SINGLE: + return "symmetric, single lane"; + case TB_LINK_WIDTH_DUAL: + return "symmetric, dual lanes"; + case TB_LINK_WIDTH_ASYM_TX: + return "asymmetric, 3 transmitters, 1 receiver"; + case TB_LINK_WIDTH_ASYM_RX: + return "asymmetric, 3 receivers, 1 transmitter"; + default: + return "unknown"; + } +} + /** * tb_port_get_link_width() - Get current link width * @port: Port to check (USB4 or CIO) @@ -972,8 +988,15 @@ int tb_port_get_link_width(struct tb_port *port) LANE_ADP_CS_1_CURRENT_WIDTH_SHIFT; } -static bool tb_port_is_width_supported(struct tb_port *port, - unsigned int width_mask) +/** + * tb_port_width_supported() - Is the given link width supported + * @port: Port to check + * @width: Widths to check (bitmask) + * + * Can be called to any lane adapter. Checks if given @width is + * supported by the hardware and returns %true if it is. + */ +bool tb_port_width_supported(struct tb_port *port, unsigned int width) { u32 phy, widths; int ret; @@ -981,15 +1004,23 @@ static bool tb_port_is_width_supported(struct tb_port *port, if (!port->cap_phy) return false; + if (width & (TB_LINK_WIDTH_ASYM_TX | TB_LINK_WIDTH_ASYM_RX)) { + if (tb_port_get_link_generation(port) < 4 || + !usb4_port_asym_supported(port)) + return false; + } + ret = tb_port_read(port, &phy, TB_CFG_PORT, port->cap_phy + LANE_ADP_CS_0, 1); if (ret) return false; - widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >> - LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT; - - return widths & width_mask; + /* + * The field encoding is the same as &enum tb_link_width (which is + * passed to @width). + */ + widths = FIELD_GET(LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK, phy); + return widths & width; } /** @@ -1024,10 +1055,18 @@ int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width) val |= LANE_ADP_CS_1_TARGET_WIDTH_SINGLE << LANE_ADP_CS_1_TARGET_WIDTH_SHIFT; break; + case TB_LINK_WIDTH_DUAL: + if (tb_port_get_link_generation(port) >= 4) + return usb4_port_asym_set_link_width(port, width); val |= LANE_ADP_CS_1_TARGET_WIDTH_DUAL << LANE_ADP_CS_1_TARGET_WIDTH_SHIFT; break; + + case TB_LINK_WIDTH_ASYM_TX: + case TB_LINK_WIDTH_ASYM_RX: + return usb4_port_asym_set_link_width(port, width); + default: return -EINVAL; } @@ -1152,7 +1191,7 @@ void tb_port_lane_bonding_disable(struct tb_port *port) /** * tb_port_wait_for_link_width() - Wait until link reaches specific width * @port: Port to wait for - * @width_mask: Expected link width mask + * @width: Expected link width (bitmask) * @timeout_msec: Timeout in ms how long to wait * * Should be used after both ends of the link have been bonded (or @@ -1161,14 +1200,14 @@ void tb_port_lane_bonding_disable(struct tb_port *port) * within the given timeout, %0 if it did. Can be passed a mask of * expected widths and succeeds if any of the widths is reached. */ -int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask, +int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width, int timeout_msec) { ktime_t timeout = ktime_add_ms(ktime_get(), timeout_msec); int ret; /* Gen 4 link does not support single lane */ - if ((width_mask & TB_LINK_WIDTH_SINGLE) && + if ((width & TB_LINK_WIDTH_SINGLE) && tb_port_get_link_generation(port) >= 4) return -EOPNOTSUPP; @@ -1182,7 +1221,7 @@ int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask, */ if (ret != -EACCES) return ret; - } else if (ret & width_mask) { + } else if (ret & width) { return 0; } @@ -2821,6 +2860,38 @@ static int tb_switch_update_link_attributes(struct tb_switch *sw) return 0; } +/* Must be called after tb_switch_update_link_attributes() */ +static void tb_switch_link_init(struct tb_switch *sw) +{ + struct tb_port *up, *down; + bool bonded; + + if (!tb_route(sw) || tb_switch_is_icm(sw)) + return; + + tb_sw_dbg(sw, "current link speed %u.0 Gb/s\n", sw->link_speed); + tb_sw_dbg(sw, "current link width %s\n", width_name(sw->link_width)); + + bonded = sw->link_width >= TB_LINK_WIDTH_DUAL; + + /* + * Gen 4 links come up as bonded so update the port structures + * accordingly. + */ + up = tb_upstream_port(sw); + down = tb_switch_downstream_port(sw); + + up->bonded = bonded; + if (up->dual_link_port) + up->dual_link_port->bonded = bonded; + tb_port_update_credits(up); + + down->bonded = bonded; + if (down->dual_link_port) + down->dual_link_port->bonded = bonded; + tb_port_update_credits(down); +} + /** * tb_switch_lane_bonding_enable() - Enable lane bonding * @sw: Switch to enable lane bonding @@ -2829,24 +2900,20 @@ static int tb_switch_update_link_attributes(struct tb_switch *sw) * switch. If conditions are correct and both switches support the feature, * lanes are bonded. It is safe to call this to any switch. */ -int tb_switch_lane_bonding_enable(struct tb_switch *sw) +static int tb_switch_lane_bonding_enable(struct tb_switch *sw) { struct tb_port *up, *down; - u64 route = tb_route(sw); - unsigned int width_mask; + unsigned int width; int ret; - if (!route) - return 0; - if (!tb_switch_lane_bonding_possible(sw)) return 0; up = tb_upstream_port(sw); down = tb_switch_downstream_port(sw); - if (!tb_port_is_width_supported(up, TB_LINK_WIDTH_DUAL) || - !tb_port_is_width_supported(down, TB_LINK_WIDTH_DUAL)) + if (!tb_port_width_supported(up, TB_LINK_WIDTH_DUAL) || + !tb_port_width_supported(down, TB_LINK_WIDTH_DUAL)) return 0; /* @@ -2870,21 +2937,10 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw) } /* Any of the widths are all bonded */ - width_mask = TB_LINK_WIDTH_DUAL | TB_LINK_WIDTH_ASYM_TX | - TB_LINK_WIDTH_ASYM_RX; + width = TB_LINK_WIDTH_DUAL | TB_LINK_WIDTH_ASYM_TX | + TB_LINK_WIDTH_ASYM_RX; - ret = tb_port_wait_for_link_width(down, width_mask, 100); - if (ret) { - tb_port_warn(down, "timeout enabling lane bonding\n"); - return ret; - } - - tb_port_update_credits(down); - tb_port_update_credits(up); - tb_switch_update_link_attributes(sw); - - tb_sw_dbg(sw, "lane bonding enabled\n"); - return ret; + return tb_port_wait_for_link_width(down, width, 100); } /** @@ -2894,20 +2950,27 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw) * Disables lane bonding between @sw and parent. This can be called even * if lanes were not bonded originally. */ -void tb_switch_lane_bonding_disable(struct tb_switch *sw) +static int tb_switch_lane_bonding_disable(struct tb_switch *sw) { struct tb_port *up, *down; int ret; - if (!tb_route(sw)) - return; - up = tb_upstream_port(sw); if (!up->bonded) - return; + return 0; + + /* + * If the link is Gen 4 there is no way to switch the link to + * two single lane links so avoid that here. Also don't bother + * if the link is not up anymore (sw is unplugged). + */ + ret = tb_port_get_link_generation(up); + if (ret < 0) + return ret; + if (ret >= 4) + return -EOPNOTSUPP; down = tb_switch_downstream_port(sw); - tb_port_lane_bonding_disable(up); tb_port_lane_bonding_disable(down); @@ -2915,15 +2978,160 @@ void tb_switch_lane_bonding_disable(struct tb_switch *sw) * It is fine if we get other errors as the router might have * been unplugged. */ - ret = tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100); - if (ret == -ETIMEDOUT) - tb_sw_warn(sw, "timeout disabling lane bonding\n"); + return tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100); +} + +static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width) +{ + struct tb_port *up, *down, *port; + enum tb_link_width down_width; + int ret; + + up = tb_upstream_port(sw); + down = tb_switch_downstream_port(sw); + + if (width == TB_LINK_WIDTH_ASYM_TX) { + down_width = TB_LINK_WIDTH_ASYM_RX; + port = down; + } else { + down_width = TB_LINK_WIDTH_ASYM_TX; + port = up; + } + + ret = tb_port_set_link_width(up, width); + if (ret) + return ret; + + ret = tb_port_set_link_width(down, down_width); + if (ret) + return ret; + + /* + * Initiate the change in the router that one of its TX lanes is + * changing to RX but do so only if there is an actual change. + */ + if (sw->link_width != width) { + ret = usb4_port_asym_start(port); + if (ret) + return ret; + + ret = tb_port_wait_for_link_width(up, width, 100); + if (ret) + return ret; + } + + sw->link_width = width; + return 0; +} + +static int tb_switch_asym_disable(struct tb_switch *sw) +{ + struct tb_port *up, *down; + int ret; + + up = tb_upstream_port(sw); + down = tb_switch_downstream_port(sw); + + ret = tb_port_set_link_width(up, TB_LINK_WIDTH_DUAL); + if (ret) + return ret; + + ret = tb_port_set_link_width(down, TB_LINK_WIDTH_DUAL); + if (ret) + return ret; + + /* + * Initiate the change in the router that has three TX lanes and + * is changing one of its TX lanes to RX but only if there is a + * change in the link width. + */ + if (sw->link_width > TB_LINK_WIDTH_DUAL) { + if (sw->link_width == TB_LINK_WIDTH_ASYM_TX) + ret = usb4_port_asym_start(up); + else + ret = usb4_port_asym_start(down); + if (ret) + return ret; + + ret = tb_port_wait_for_link_width(up, TB_LINK_WIDTH_DUAL, 100); + if (ret) + return ret; + } + + sw->link_width = TB_LINK_WIDTH_DUAL; + return 0; +} + +/** + * tb_switch_set_link_width() - Configure router link width + * @sw: Router to configure + * @width: The new link width + * + * Set device router link width to @width from router upstream port + * perspective. Supports also asymmetric links if the routers boths side + * of the link supports it. + * + * Does nothing for host router. + * + * Returns %0 in case of success, negative errno otherwise. + */ +int tb_switch_set_link_width(struct tb_switch *sw, enum tb_link_width width) +{ + struct tb_port *up, *down; + int ret = 0; + + if (!tb_route(sw)) + return 0; + + up = tb_upstream_port(sw); + down = tb_switch_downstream_port(sw); + + switch (width) { + case TB_LINK_WIDTH_SINGLE: + ret = tb_switch_lane_bonding_disable(sw); + break; + + case TB_LINK_WIDTH_DUAL: + if (sw->link_width == TB_LINK_WIDTH_ASYM_TX || + sw->link_width == TB_LINK_WIDTH_ASYM_RX) { + ret = tb_switch_asym_disable(sw); + if (ret) + break; + } + ret = tb_switch_lane_bonding_enable(sw); + break; + + case TB_LINK_WIDTH_ASYM_TX: + case TB_LINK_WIDTH_ASYM_RX: + ret = tb_switch_asym_enable(sw, width); + break; + } + + switch (ret) { + case 0: + break; + + case -ETIMEDOUT: + tb_sw_warn(sw, "timeout changing link width\n"); + return ret; + + case -ENOTCONN: + case -EOPNOTSUPP: + case -ENODEV: + return ret; + + default: + tb_sw_dbg(sw, "failed to change link width: %d\n", ret); + return ret; + } tb_port_update_credits(down); tb_port_update_credits(up); + tb_switch_update_link_attributes(sw); - tb_sw_dbg(sw, "lane bonding disabled\n"); + tb_sw_dbg(sw, "link width set to %s\n", width_name(width)); + return ret; } /** @@ -3090,6 +3298,8 @@ int tb_switch_add(struct tb_switch *sw) if (ret) return ret; + tb_switch_link_init(sw); + ret = tb_switch_clx_init(sw); if (ret) return ret; diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 39ec8da576ef..550f1c9a1170 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -985,7 +985,7 @@ static void tb_scan_port(struct tb_port *port) } /* Enable lane bonding if supported */ - tb_switch_lane_bonding_enable(sw); + tb_switch_set_link_width(sw, TB_LINK_WIDTH_DUAL); /* Set the link configured */ tb_switch_configure_link(sw); /* @@ -1103,7 +1103,8 @@ static void tb_free_unplugged_children(struct tb_switch *sw) tb_retimer_remove_all(port); tb_remove_dp_resources(port->remote->sw); tb_switch_unconfigure_link(port->remote->sw); - tb_switch_lane_bonding_disable(port->remote->sw); + tb_switch_set_link_width(port->remote->sw, + TB_LINK_WIDTH_SINGLE); tb_switch_remove(port->remote->sw); port->remote = NULL; if (port->dual_link_port) @@ -1766,7 +1767,8 @@ static void tb_handle_hotplug(struct work_struct *work) tb_remove_dp_resources(port->remote->sw); tb_switch_tmu_disable(port->remote->sw); tb_switch_unconfigure_link(port->remote->sw); - tb_switch_lane_bonding_disable(port->remote->sw); + tb_switch_set_link_width(port->remote->sw, + TB_LINK_WIDTH_SINGLE); tb_switch_remove(port->remote->sw); port->remote = NULL; if (port->dual_link_port) @@ -2258,7 +2260,8 @@ static void tb_restore_children(struct tb_switch *sw) continue; if (port->remote) { - tb_switch_lane_bonding_enable(port->remote->sw); + tb_switch_set_link_width(port->remote->sw, + port->remote->sw->link_width); tb_switch_configure_link(port->remote->sw); tb_restore_children(port->remote->sw); diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index d2ef9575231c..920dac8a63e1 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -164,11 +164,6 @@ struct tb_switch_tmu { * switches) you need to have domain lock held. * * In USB4 terminology this structure represents a router. - * - * Note @link_width is not the same as whether link is bonded or not. - * For Gen 4 links the link is also bonded when it is asymmetric. The - * correct way to find out whether the link is bonded or not is to look - * @bonded field of the upstream port. */ struct tb_switch { struct device dev; @@ -969,8 +964,7 @@ static inline bool tb_switch_is_icm(const struct tb_switch *sw) return !sw->config.enabled; } -int tb_switch_lane_bonding_enable(struct tb_switch *sw); -void tb_switch_lane_bonding_disable(struct tb_switch *sw); +int tb_switch_set_link_width(struct tb_switch *sw, enum tb_link_width width); int tb_switch_configure_link(struct tb_switch *sw); void tb_switch_unconfigure_link(struct tb_switch *sw); @@ -1103,10 +1097,11 @@ static inline bool tb_port_use_credit_allocation(const struct tb_port *port) int tb_port_get_link_speed(struct tb_port *port); int tb_port_get_link_generation(struct tb_port *port); int tb_port_get_link_width(struct tb_port *port); +bool tb_port_width_supported(struct tb_port *port, unsigned int width); int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width); int tb_port_lane_bonding_enable(struct tb_port *port); void tb_port_lane_bonding_disable(struct tb_port *port); -int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask, +int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width, int timeout_msec); int tb_port_update_credits(struct tb_port *port); @@ -1304,6 +1299,11 @@ int usb4_port_router_online(struct tb_port *port); int usb4_port_enumerate_retimers(struct tb_port *port); bool usb4_port_clx_supported(struct tb_port *port); int usb4_port_margining_caps(struct tb_port *port, u32 *caps); + +bool usb4_port_asym_supported(struct tb_port *port); +int usb4_port_asym_set_link_width(struct tb_port *port, enum tb_link_width width); +int usb4_port_asym_start(struct tb_port *port); + int usb4_port_hw_margin(struct tb_port *port, unsigned int lanes, unsigned int ber_level, bool timing, bool right_high, u32 *results); diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h index 736e28beac11..4419e274d2b4 100644 --- a/drivers/thunderbolt/tb_regs.h +++ b/drivers/thunderbolt/tb_regs.h @@ -348,10 +348,14 @@ struct tb_regs_port_header { #define LANE_ADP_CS_1 0x01 #define LANE_ADP_CS_1_TARGET_SPEED_MASK GENMASK(3, 0) #define LANE_ADP_CS_1_TARGET_SPEED_GEN3 0xc -#define LANE_ADP_CS_1_TARGET_WIDTH_MASK GENMASK(9, 4) +#define LANE_ADP_CS_1_TARGET_WIDTH_MASK GENMASK(5, 4) #define LANE_ADP_CS_1_TARGET_WIDTH_SHIFT 4 #define LANE_ADP_CS_1_TARGET_WIDTH_SINGLE 0x1 #define LANE_ADP_CS_1_TARGET_WIDTH_DUAL 0x3 +#define LANE_ADP_CS_1_TARGET_WIDTH_ASYM_MASK GENMASK(7, 6) +#define LANE_ADP_CS_1_TARGET_WIDTH_ASYM_TX 0x1 +#define LANE_ADP_CS_1_TARGET_WIDTH_ASYM_RX 0x2 +#define LANE_ADP_CS_1_TARGET_WIDTH_ASYM_DUAL 0x0 #define LANE_ADP_CS_1_CL0S_ENABLE BIT(10) #define LANE_ADP_CS_1_CL1_ENABLE BIT(11) #define LANE_ADP_CS_1_CL2_ENABLE BIT(12) @@ -384,6 +388,8 @@ struct tb_regs_port_header { #define PORT_CS_18_WOCS BIT(16) #define PORT_CS_18_WODS BIT(17) #define PORT_CS_18_WOU4S BIT(18) +#define PORT_CS_18_CSA BIT(22) +#define PORT_CS_18_TIP BIT(24) #define PORT_CS_19 0x13 #define PORT_CS_19_DPR BIT(0) #define PORT_CS_19_PC BIT(3) @@ -391,6 +397,7 @@ struct tb_regs_port_header { #define PORT_CS_19_WOC BIT(16) #define PORT_CS_19_WOD BIT(17) #define PORT_CS_19_WOU4 BIT(18) +#define PORT_CS_19_START_ASYM BIT(24) /* Display Port adapter registers */ #define ADP_DP_CS_0 0x00 diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 3aa32d7f9f6a..e048e81c3027 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -1494,6 +1494,112 @@ bool usb4_port_clx_supported(struct tb_port *port) return !!(val & PORT_CS_18_CPS); } +/** + * usb4_port_asym_supported() - If the port supports asymmetric link + * @port: USB4 port + * + * Checks if the port and the cable supports asymmetric link and returns + * %true in that case. + */ +bool usb4_port_asym_supported(struct tb_port *port) +{ + u32 val; + + if (!port->cap_usb4) + return false; + + if (tb_port_read(port, &val, TB_CFG_PORT, port->cap_usb4 + PORT_CS_18, 1)) + return false; + + return !!(val & PORT_CS_18_CSA); +} + +/** + * usb4_port_asym_set_link_width() - Set link width to asymmetric or symmetric + * @port: USB4 port + * @width: Asymmetric width to configure + * + * Sets USB4 port link width to @width. Can be called for widths where + * usb4_port_asym_width_supported() returned @true. + */ +int usb4_port_asym_set_link_width(struct tb_port *port, enum tb_link_width width) +{ + u32 val; + int ret; + + if (!port->cap_phy) + return -EINVAL; + + ret = tb_port_read(port, &val, TB_CFG_PORT, + port->cap_phy + LANE_ADP_CS_1, 1); + if (ret) + return ret; + + val &= ~LANE_ADP_CS_1_TARGET_WIDTH_ASYM_MASK; + switch (width) { + case TB_LINK_WIDTH_DUAL: + val |= FIELD_PREP(LANE_ADP_CS_1_TARGET_WIDTH_ASYM_MASK, + LANE_ADP_CS_1_TARGET_WIDTH_ASYM_DUAL); + break; + case TB_LINK_WIDTH_ASYM_TX: + val |= FIELD_PREP(LANE_ADP_CS_1_TARGET_WIDTH_ASYM_MASK, + LANE_ADP_CS_1_TARGET_WIDTH_ASYM_TX); + break; + case TB_LINK_WIDTH_ASYM_RX: + val |= FIELD_PREP(LANE_ADP_CS_1_TARGET_WIDTH_ASYM_MASK, + LANE_ADP_CS_1_TARGET_WIDTH_ASYM_RX); + break; + default: + return -EINVAL; + } + + return tb_port_write(port, &val, TB_CFG_PORT, + port->cap_phy + LANE_ADP_CS_1, 1); +} + +/** + * usb4_port_asym_start() - Start symmetry change and wait for completion + * @port: USB4 port + * + * Start symmetry change of the link to asymmetric or symmetric + * (according to what was previously set in tb_port_set_link_width(). + * Wait for completion of the change. + * + * Returns %0 in case of success, %-ETIMEDOUT if case of timeout or + * a negative errno in case of a failure. + */ +int usb4_port_asym_start(struct tb_port *port) +{ + int ret; + u32 val; + + ret = tb_port_read(port, &val, TB_CFG_PORT, + port->cap_usb4 + PORT_CS_19, 1); + if (ret) + return ret; + + val &= ~PORT_CS_19_START_ASYM; + val |= FIELD_PREP(PORT_CS_19_START_ASYM, 1); + + ret = tb_port_write(port, &val, TB_CFG_PORT, + port->cap_usb4 + PORT_CS_19, 1); + if (ret) + return ret; + + /* + * Wait for PORT_CS_19_START_ASYM to be 0. This means the USB4 + * port started the symmetry transition. + */ + ret = usb4_port_wait_for_bit(port, port->cap_usb4 + PORT_CS_19, + PORT_CS_19_START_ASYM, 0, 1000); + if (ret) + return ret; + + /* Then wait for the transtion to be completed */ + return usb4_port_wait_for_bit(port, port->cap_usb4 + PORT_CS_18, + PORT_CS_18_TIP, 0, 5000); +} + /** * usb4_port_margining_caps() - Read USB4 port marginig capabilities * @port: USB4 port From aed38a3eaf652230e8622ec9317998540276f61d Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:08 +0000 Subject: [PATCH 508/534] thunderbolt: Configure asymmetric link if needed and bandwidth allows [ Upstream commit 3e36528c1127b20492ffaea53930bcc3df46a718 ] USB4 v2 spec defines a Gen 4 link that can operate as an asymmetric 120/40G. When the link is asymmetric, the USB4 port on one side of the link operates with three TX lanes and one RX lane, while the USB4 port on the opposite side of the link operates with three RX lanes and one TX lane. Using asymmetric link we can get much more bandwidth from one direction and that allows us to support the new Ultra High Bit Rate DisplayPort modes (that consume up to 77.37 Gb/s). Add the basic logic for changing Gen 4 links to asymmetric and back following the below rules: 1) The default threshold is 45 Gb/s (tunable by asym_threshold) 2) When DisplayPort tunnel is established, or when there is bandwidth request through bandwidth allocation mode, the links can be transitioned to asymmetric or symmetric (depending on the required bandwidth). 3) Only DisplayPort bandwidth on a link, is taken into account when deciding whether a link is transitioned to asymmetric or symmetric 4) If bandwidth on a link is >= asym_threshold transition the link to asymmetric 5) If bandwidth on a link < asym_threshold transition the link to symmetric (unless the bandwidth request is above currently allocated on a tunnel). 6) If a USB4 v2 device router with symmetric link is connected, transition all the links above it to symmetric if the bandwidth allows. Signed-off-by: Gil Fine Co-developed-by: Mika Westerberg Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 709 +++++++++++++++++++++++++++++++-------- 1 file changed, 572 insertions(+), 137 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 550f1c9a1170..c52cbd5194f1 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -16,8 +16,31 @@ #include "tb_regs.h" #include "tunnel.h" -#define TB_TIMEOUT 100 /* ms */ -#define MAX_GROUPS 7 /* max Group_ID is 7 */ +#define TB_TIMEOUT 100 /* ms */ + +/* + * Minimum bandwidth (in Mb/s) that is needed in the single transmitter/receiver + * direction. This is 40G - 10% guard band bandwidth. + */ +#define TB_ASYM_MIN (40000 * 90 / 100) + +/* + * Threshold bandwidth (in Mb/s) that is used to switch the links to + * asymmetric and back. This is selected as 45G which means when the + * request is higher than this, we switch the link to asymmetric, and + * when it is less than this we switch it back. The 45G is selected so + * that we still have 27G (of the total 72G) for bulk PCIe traffic when + * switching back to symmetric. + */ +#define TB_ASYM_THRESHOLD 45000 + +#define MAX_GROUPS 7 /* max Group_ID is 7 */ + +static unsigned int asym_threshold = TB_ASYM_THRESHOLD; +module_param_named(asym_threshold, asym_threshold, uint, 0444); +MODULE_PARM_DESC(asym_threshold, + "threshold (Mb/s) when to Gen 4 switch link symmetry. 0 disables. (default: " + __MODULE_STRING(TB_ASYM_THRESHOLD) ")"); /** * struct tb_cm - Simple Thunderbolt connection manager @@ -285,14 +308,32 @@ static int tb_enable_clx(struct tb_switch *sw) return ret == -EOPNOTSUPP ? 0 : ret; } -/* Disables CL states up to the host router */ -static void tb_disable_clx(struct tb_switch *sw) +/** + * tb_disable_clx() - Disable CL states up to host router + * @sw: Router to start + * + * Disables CL states from @sw up to the host router. Returns true if + * any CL state were disabled. This can be used to figure out whether + * the link was setup by us or the boot firmware so we don't + * accidentally enable them if they were not enabled during discovery. + */ +static bool tb_disable_clx(struct tb_switch *sw) { + bool disabled = false; + do { - if (tb_switch_clx_disable(sw) < 0) + int ret; + + ret = tb_switch_clx_disable(sw); + if (ret > 0) + disabled = true; + else if (ret < 0) tb_sw_warn(sw, "failed to disable CL states\n"); + sw = tb_switch_parent(sw); } while (sw); + + return disabled; } static int tb_increase_switch_tmu_accuracy(struct device *dev, void *data) @@ -572,144 +613,294 @@ static struct tb_tunnel *tb_find_first_usb3_tunnel(struct tb *tb, return tb_find_tunnel(tb, TB_TUNNEL_USB3, usb3_down, NULL); } -static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port, - struct tb_port *dst_port, int *available_up, int *available_down) +/** + * tb_consumed_usb3_pcie_bandwidth() - Consumed USB3/PCIe bandwidth over a single link + * @tb: Domain structure + * @src_port: Source protocol adapter + * @dst_port: Destination protocol adapter + * @port: USB4 port the consumed bandwidth is calculated + * @consumed_up: Consumed upsream bandwidth (Mb/s) + * @consumed_down: Consumed downstream bandwidth (Mb/s) + * + * Calculates consumed USB3 and PCIe bandwidth at @port between path + * from @src_port to @dst_port. Does not take tunnel starting from + * @src_port and ending from @src_port into account. + */ +static int tb_consumed_usb3_pcie_bandwidth(struct tb *tb, + struct tb_port *src_port, + struct tb_port *dst_port, + struct tb_port *port, + int *consumed_up, + int *consumed_down) { - int usb3_consumed_up, usb3_consumed_down, ret; - struct tb_cm *tcm = tb_priv(tb); + int pci_consumed_up, pci_consumed_down; struct tb_tunnel *tunnel; - struct tb_port *port; - tb_dbg(tb, "calculating available bandwidth between %llx:%u <-> %llx:%u\n", - tb_route(src_port->sw), src_port->port, tb_route(dst_port->sw), - dst_port->port); + *consumed_up = *consumed_down = 0; tunnel = tb_find_first_usb3_tunnel(tb, src_port, dst_port); if (tunnel && tunnel->src_port != src_port && tunnel->dst_port != dst_port) { - ret = tb_tunnel_consumed_bandwidth(tunnel, &usb3_consumed_up, - &usb3_consumed_down); + int ret; + + ret = tb_tunnel_consumed_bandwidth(tunnel, consumed_up, + consumed_down); if (ret) return ret; - } else { - usb3_consumed_up = 0; - usb3_consumed_down = 0; } + /* + * If there is anything reserved for PCIe bulk traffic take it + * into account here too. + */ + if (tb_tunnel_reserved_pci(port, &pci_consumed_up, &pci_consumed_down)) { + *consumed_up += pci_consumed_up; + *consumed_down += pci_consumed_down; + } + + return 0; +} + +/** + * tb_consumed_dp_bandwidth() - Consumed DP bandwidth over a single link + * @tb: Domain structure + * @src_port: Source protocol adapter + * @dst_port: Destination protocol adapter + * @port: USB4 port the consumed bandwidth is calculated + * @consumed_up: Consumed upsream bandwidth (Mb/s) + * @consumed_down: Consumed downstream bandwidth (Mb/s) + * + * Calculates consumed DP bandwidth at @port between path from @src_port + * to @dst_port. Does not take tunnel starting from @src_port and ending + * from @src_port into account. + */ +static int tb_consumed_dp_bandwidth(struct tb *tb, + struct tb_port *src_port, + struct tb_port *dst_port, + struct tb_port *port, + int *consumed_up, + int *consumed_down) +{ + struct tb_cm *tcm = tb_priv(tb); + struct tb_tunnel *tunnel; + int ret; + + *consumed_up = *consumed_down = 0; + + /* + * Find all DP tunnels that cross the port and reduce + * their consumed bandwidth from the available. + */ + list_for_each_entry(tunnel, &tcm->tunnel_list, list) { + int dp_consumed_up, dp_consumed_down; + + if (tb_tunnel_is_invalid(tunnel)) + continue; + + if (!tb_tunnel_is_dp(tunnel)) + continue; + + if (!tb_tunnel_port_on_path(tunnel, port)) + continue; + + /* + * Ignore the DP tunnel between src_port and dst_port + * because it is the same tunnel and we may be + * re-calculating estimated bandwidth. + */ + if (tunnel->src_port == src_port && + tunnel->dst_port == dst_port) + continue; + + ret = tb_tunnel_consumed_bandwidth(tunnel, &dp_consumed_up, + &dp_consumed_down); + if (ret) + return ret; + + *consumed_up += dp_consumed_up; + *consumed_down += dp_consumed_down; + } + + return 0; +} + +static bool tb_asym_supported(struct tb_port *src_port, struct tb_port *dst_port, + struct tb_port *port) +{ + bool downstream = tb_port_path_direction_downstream(src_port, dst_port); + enum tb_link_width width; + + if (tb_is_upstream_port(port)) + width = downstream ? TB_LINK_WIDTH_ASYM_RX : TB_LINK_WIDTH_ASYM_TX; + else + width = downstream ? TB_LINK_WIDTH_ASYM_TX : TB_LINK_WIDTH_ASYM_RX; + + return tb_port_width_supported(port, width); +} + +/** + * tb_maximum_banwidth() - Maximum bandwidth over a single link + * @tb: Domain structure + * @src_port: Source protocol adapter + * @dst_port: Destination protocol adapter + * @port: USB4 port the total bandwidth is calculated + * @max_up: Maximum upstream bandwidth (Mb/s) + * @max_down: Maximum downstream bandwidth (Mb/s) + * @include_asym: Include bandwidth if the link is switched from + * symmetric to asymmetric + * + * Returns maximum possible bandwidth in @max_up and @max_down over a + * single link at @port. If @include_asym is set then includes the + * additional banwdith if the links are transitioned into asymmetric to + * direction from @src_port to @dst_port. + */ +static int tb_maximum_bandwidth(struct tb *tb, struct tb_port *src_port, + struct tb_port *dst_port, struct tb_port *port, + int *max_up, int *max_down, bool include_asym) +{ + bool downstream = tb_port_path_direction_downstream(src_port, dst_port); + int link_speed, link_width, up_bw, down_bw; + + /* + * Can include asymmetric, only if it is actually supported by + * the lane adapter. + */ + if (!tb_asym_supported(src_port, dst_port, port)) + include_asym = false; + + if (tb_is_upstream_port(port)) { + link_speed = port->sw->link_speed; + /* + * sw->link_width is from upstream perspective so we use + * the opposite for downstream of the host router. + */ + if (port->sw->link_width == TB_LINK_WIDTH_ASYM_TX) { + up_bw = link_speed * 3 * 1000; + down_bw = link_speed * 1 * 1000; + } else if (port->sw->link_width == TB_LINK_WIDTH_ASYM_RX) { + up_bw = link_speed * 1 * 1000; + down_bw = link_speed * 3 * 1000; + } else if (include_asym) { + /* + * The link is symmetric at the moment but we + * can switch it to asymmetric as needed. Report + * this bandwidth as available (even though it + * is not yet enabled). + */ + if (downstream) { + up_bw = link_speed * 1 * 1000; + down_bw = link_speed * 3 * 1000; + } else { + up_bw = link_speed * 3 * 1000; + down_bw = link_speed * 1 * 1000; + } + } else { + up_bw = link_speed * port->sw->link_width * 1000; + down_bw = up_bw; + } + } else { + link_speed = tb_port_get_link_speed(port); + if (link_speed < 0) + return link_speed; + + link_width = tb_port_get_link_width(port); + if (link_width < 0) + return link_width; + + if (link_width == TB_LINK_WIDTH_ASYM_TX) { + up_bw = link_speed * 1 * 1000; + down_bw = link_speed * 3 * 1000; + } else if (link_width == TB_LINK_WIDTH_ASYM_RX) { + up_bw = link_speed * 3 * 1000; + down_bw = link_speed * 1 * 1000; + } else if (include_asym) { + /* + * The link is symmetric at the moment but we + * can switch it to asymmetric as needed. Report + * this bandwidth as available (even though it + * is not yet enabled). + */ + if (downstream) { + up_bw = link_speed * 1 * 1000; + down_bw = link_speed * 3 * 1000; + } else { + up_bw = link_speed * 3 * 1000; + down_bw = link_speed * 1 * 1000; + } + } else { + up_bw = link_speed * link_width * 1000; + down_bw = up_bw; + } + } + + /* Leave 10% guard band */ + *max_up = up_bw - up_bw / 10; + *max_down = down_bw - down_bw / 10; + + tb_port_dbg(port, "link maximum bandwidth %d/%d Mb/s\n", *max_up, *max_down); + return 0; +} + +/** + * tb_available_bandwidth() - Available bandwidth for tunneling + * @tb: Domain structure + * @src_port: Source protocol adapter + * @dst_port: Destination protocol adapter + * @available_up: Available bandwidth upstream (Mb/s) + * @available_down: Available bandwidth downstream (Mb/s) + * @include_asym: Include bandwidth if the link is switched from + * symmetric to asymmetric + * + * Calculates maximum available bandwidth for protocol tunneling between + * @src_port and @dst_port at the moment. This is minimum of maximum + * link bandwidth across all links reduced by currently consumed + * bandwidth on that link. + * + * If @include_asym is true then includes also bandwidth that can be + * added when the links are transitioned into asymmetric (but does not + * transition the links). + */ +static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port, + struct tb_port *dst_port, int *available_up, + int *available_down, bool include_asym) +{ + struct tb_port *port; + int ret; + /* Maximum possible bandwidth asymmetric Gen 4 link is 120 Gb/s */ *available_up = *available_down = 120000; /* Find the minimum available bandwidth over all links */ tb_for_each_port_on_path(src_port, dst_port, port) { - int link_speed, link_width, up_bw, down_bw; - int pci_reserved_up, pci_reserved_down; + int max_up, max_down, consumed_up, consumed_down; if (!tb_port_is_null(port)) continue; - if (tb_is_upstream_port(port)) { - link_speed = port->sw->link_speed; - /* - * sw->link_width is from upstream perspective - * so we use the opposite for downstream of the - * host router. - */ - if (port->sw->link_width == TB_LINK_WIDTH_ASYM_TX) { - up_bw = link_speed * 3 * 1000; - down_bw = link_speed * 1 * 1000; - } else if (port->sw->link_width == TB_LINK_WIDTH_ASYM_RX) { - up_bw = link_speed * 1 * 1000; - down_bw = link_speed * 3 * 1000; - } else { - up_bw = link_speed * port->sw->link_width * 1000; - down_bw = up_bw; - } - } else { - link_speed = tb_port_get_link_speed(port); - if (link_speed < 0) - return link_speed; + ret = tb_maximum_bandwidth(tb, src_port, dst_port, port, + &max_up, &max_down, include_asym); + if (ret) + return ret; - link_width = tb_port_get_link_width(port); - if (link_width < 0) - return link_width; + ret = tb_consumed_usb3_pcie_bandwidth(tb, src_port, dst_port, + port, &consumed_up, + &consumed_down); + if (ret) + return ret; + max_up -= consumed_up; + max_down -= consumed_down; - if (link_width == TB_LINK_WIDTH_ASYM_TX) { - up_bw = link_speed * 1 * 1000; - down_bw = link_speed * 3 * 1000; - } else if (link_width == TB_LINK_WIDTH_ASYM_RX) { - up_bw = link_speed * 3 * 1000; - down_bw = link_speed * 1 * 1000; - } else { - up_bw = link_speed * link_width * 1000; - down_bw = up_bw; - } - } + ret = tb_consumed_dp_bandwidth(tb, src_port, dst_port, port, + &consumed_up, &consumed_down); + if (ret) + return ret; + max_up -= consumed_up; + max_down -= consumed_down; - /* Leave 10% guard band */ - up_bw -= up_bw / 10; - down_bw -= down_bw / 10; - - tb_port_dbg(port, "link total bandwidth %d/%d Mb/s\n", up_bw, - down_bw); - - /* - * Find all DP tunnels that cross the port and reduce - * their consumed bandwidth from the available. - */ - list_for_each_entry(tunnel, &tcm->tunnel_list, list) { - int dp_consumed_up, dp_consumed_down; - - if (tb_tunnel_is_invalid(tunnel)) - continue; - - if (!tb_tunnel_is_dp(tunnel)) - continue; - - if (!tb_tunnel_port_on_path(tunnel, port)) - continue; - - /* - * Ignore the DP tunnel between src_port and - * dst_port because it is the same tunnel and we - * may be re-calculating estimated bandwidth. - */ - if (tunnel->src_port == src_port && - tunnel->dst_port == dst_port) - continue; - - ret = tb_tunnel_consumed_bandwidth(tunnel, - &dp_consumed_up, - &dp_consumed_down); - if (ret) - return ret; - - up_bw -= dp_consumed_up; - down_bw -= dp_consumed_down; - } - - /* - * If USB3 is tunneled from the host router down to the - * branch leading to port we need to take USB3 consumed - * bandwidth into account regardless whether it actually - * crosses the port. - */ - up_bw -= usb3_consumed_up; - down_bw -= usb3_consumed_down; - - /* - * If there is anything reserved for PCIe bulk traffic - * take it into account here too. - */ - if (tb_tunnel_reserved_pci(port, &pci_reserved_up, - &pci_reserved_down)) { - up_bw -= pci_reserved_up; - down_bw -= pci_reserved_down; - } - - if (up_bw < *available_up) - *available_up = up_bw; - if (down_bw < *available_down) - *available_down = down_bw; + if (max_up < *available_up) + *available_up = max_up; + if (max_down < *available_down) + *available_down = max_down; } if (*available_up < 0) @@ -747,7 +938,7 @@ static void tb_reclaim_usb3_bandwidth(struct tb *tb, struct tb_port *src_port, * That determines the whole USB3 bandwidth for this branch. */ ret = tb_available_bandwidth(tb, tunnel->src_port, tunnel->dst_port, - &available_up, &available_down); + &available_up, &available_down, false); if (ret) { tb_warn(tb, "failed to calculate available bandwidth\n"); return; @@ -805,8 +996,8 @@ static int tb_tunnel_usb3(struct tb *tb, struct tb_switch *sw) return ret; } - ret = tb_available_bandwidth(tb, down, up, &available_up, - &available_down); + ret = tb_available_bandwidth(tb, down, up, &available_up, &available_down, + false); if (ret) goto err_reclaim; @@ -867,6 +1058,225 @@ static int tb_create_usb3_tunnels(struct tb_switch *sw) return 0; } +/** + * tb_configure_asym() - Transition links to asymmetric if needed + * @tb: Domain structure + * @src_port: Source adapter to start the transition + * @dst_port: Destination adapter + * @requested_up: Additional bandwidth (Mb/s) required upstream + * @requested_down: Additional bandwidth (Mb/s) required downstream + * + * Transition links between @src_port and @dst_port into asymmetric, with + * three lanes in the direction from @src_port towards @dst_port and one lane + * in the opposite direction, if the bandwidth requirements + * (requested + currently consumed) on that link exceed @asym_threshold. + * + * Must be called with available >= requested over all links. + */ +static int tb_configure_asym(struct tb *tb, struct tb_port *src_port, + struct tb_port *dst_port, int requested_up, + int requested_down) +{ + struct tb_switch *sw; + bool clx, downstream; + struct tb_port *up; + int ret = 0; + + if (!asym_threshold) + return 0; + + /* Disable CL states before doing any transitions */ + downstream = tb_port_path_direction_downstream(src_port, dst_port); + /* Pick up router deepest in the hierarchy */ + if (downstream) + sw = dst_port->sw; + else + sw = src_port->sw; + + clx = tb_disable_clx(sw); + + tb_for_each_upstream_port_on_path(src_port, dst_port, up) { + int consumed_up, consumed_down; + enum tb_link_width width; + + ret = tb_consumed_dp_bandwidth(tb, src_port, dst_port, up, + &consumed_up, &consumed_down); + if (ret) + break; + + if (downstream) { + /* + * Downstream so make sure upstream is within the 36G + * (40G - guard band 10%), and the requested is above + * what the threshold is. + */ + if (consumed_up + requested_up >= TB_ASYM_MIN) { + ret = -ENOBUFS; + break; + } + /* Does consumed + requested exceed the threshold */ + if (consumed_down + requested_down < asym_threshold) + continue; + + width = TB_LINK_WIDTH_ASYM_RX; + } else { + /* Upstream, the opposite of above */ + if (consumed_down + requested_down >= TB_ASYM_MIN) { + ret = -ENOBUFS; + break; + } + if (consumed_up + requested_up < asym_threshold) + continue; + + width = TB_LINK_WIDTH_ASYM_TX; + } + + if (up->sw->link_width == width) + continue; + + if (!tb_port_width_supported(up, width)) + continue; + + tb_sw_dbg(up->sw, "configuring asymmetric link\n"); + + /* + * Here requested + consumed > threshold so we need to + * transtion the link into asymmetric now. + */ + ret = tb_switch_set_link_width(up->sw, width); + if (ret) { + tb_sw_warn(up->sw, "failed to set link width\n"); + break; + } + } + + /* Re-enable CL states if they were previosly enabled */ + if (clx) + tb_enable_clx(sw); + + return ret; +} + +/** + * tb_configure_sym() - Transition links to symmetric if possible + * @tb: Domain structure + * @src_port: Source adapter to start the transition + * @dst_port: Destination adapter + * @requested_up: New lower bandwidth request upstream (Mb/s) + * @requested_down: New lower bandwidth request downstream (Mb/s) + * + * Goes over each link from @src_port to @dst_port and tries to + * transition the link to symmetric if the currently consumed bandwidth + * allows. + */ +static int tb_configure_sym(struct tb *tb, struct tb_port *src_port, + struct tb_port *dst_port, int requested_up, + int requested_down) +{ + struct tb_switch *sw; + bool clx, downstream; + struct tb_port *up; + int ret = 0; + + if (!asym_threshold) + return 0; + + /* Disable CL states before doing any transitions */ + downstream = tb_port_path_direction_downstream(src_port, dst_port); + /* Pick up router deepest in the hierarchy */ + if (downstream) + sw = dst_port->sw; + else + sw = src_port->sw; + + clx = tb_disable_clx(sw); + + tb_for_each_upstream_port_on_path(src_port, dst_port, up) { + int consumed_up, consumed_down; + + /* Already symmetric */ + if (up->sw->link_width <= TB_LINK_WIDTH_DUAL) + continue; + /* Unplugged, no need to switch */ + if (up->sw->is_unplugged) + continue; + + ret = tb_consumed_dp_bandwidth(tb, src_port, dst_port, up, + &consumed_up, &consumed_down); + if (ret) + break; + + if (downstream) { + /* + * Downstream so we want the consumed_down < threshold. + * Upstream traffic should be less than 36G (40G + * guard band 10%) as the link was configured asymmetric + * already. + */ + if (consumed_down + requested_down >= asym_threshold) + continue; + } else { + if (consumed_up + requested_up >= asym_threshold) + continue; + } + + if (up->sw->link_width == TB_LINK_WIDTH_DUAL) + continue; + + tb_sw_dbg(up->sw, "configuring symmetric link\n"); + + ret = tb_switch_set_link_width(up->sw, TB_LINK_WIDTH_DUAL); + if (ret) { + tb_sw_warn(up->sw, "failed to set link width\n"); + break; + } + } + + /* Re-enable CL states if they were previosly enabled */ + if (clx) + tb_enable_clx(sw); + + return ret; +} + +static void tb_configure_link(struct tb_port *down, struct tb_port *up, + struct tb_switch *sw) +{ + struct tb *tb = sw->tb; + + /* Link the routers using both links if available */ + down->remote = up; + up->remote = down; + if (down->dual_link_port && up->dual_link_port) { + down->dual_link_port->remote = up->dual_link_port; + up->dual_link_port->remote = down->dual_link_port; + } + + /* + * Enable lane bonding if the link is currently two single lane + * links. + */ + if (sw->link_width < TB_LINK_WIDTH_DUAL) + tb_switch_set_link_width(sw, TB_LINK_WIDTH_DUAL); + + /* + * Device router that comes up as symmetric link is + * connected deeper in the hierarchy, we transition the links + * above into symmetric if bandwidth allows. + */ + if (tb_switch_depth(sw) > 1 && + tb_port_get_link_generation(up) >= 4 && + up->sw->link_width == TB_LINK_WIDTH_DUAL) { + struct tb_port *host_port; + + host_port = tb_port_at(tb_route(sw), tb->root_switch); + tb_configure_sym(tb, host_port, up, 0, 0); + } + + /* Set the link configured */ + tb_switch_configure_link(sw); +} + static void tb_scan_port(struct tb_port *port); /* @@ -975,19 +1385,9 @@ static void tb_scan_port(struct tb_port *port) goto out_rpm_put; } - /* Link the switches using both links if available */ upstream_port = tb_upstream_port(sw); - port->remote = upstream_port; - upstream_port->remote = port; - if (port->dual_link_port && upstream_port->dual_link_port) { - port->dual_link_port->remote = upstream_port->dual_link_port; - upstream_port->dual_link_port->remote = port->dual_link_port; - } + tb_configure_link(port, upstream_port, sw); - /* Enable lane bonding if supported */ - tb_switch_set_link_width(sw, TB_LINK_WIDTH_DUAL); - /* Set the link configured */ - tb_switch_configure_link(sw); /* * CL0s and CL1 are enabled and supported together. * Silently ignore CLx enabling in case CLx is not supported. @@ -1051,6 +1451,11 @@ static void tb_deactivate_and_free_tunnel(struct tb_tunnel *tunnel) * deallocated properly. */ tb_switch_dealloc_dp_resource(src_port->sw, src_port); + /* + * If bandwidth on a link is < asym_threshold + * transition the link to symmetric. + */ + tb_configure_sym(tb, src_port, dst_port, 0, 0); /* Now we can allow the domain to runtime suspend again */ pm_runtime_mark_last_busy(&dst_port->sw->dev); pm_runtime_put_autosuspend(&dst_port->sw->dev); @@ -1208,7 +1613,7 @@ tb_recalc_estimated_bandwidth_for_group(struct tb_bandwidth_group *group) out = tunnel->dst_port; ret = tb_available_bandwidth(tb, in, out, &estimated_up, - &estimated_down); + &estimated_down, true); if (ret) { tb_port_warn(in, "failed to re-calculate estimated bandwidth\n"); @@ -1299,6 +1704,7 @@ static bool tb_tunnel_one_dp(struct tb *tb) int available_up, available_down, ret, link_nr; struct tb_cm *tcm = tb_priv(tb); struct tb_port *port, *in, *out; + int consumed_up, consumed_down; struct tb_tunnel *tunnel; /* @@ -1375,7 +1781,8 @@ static bool tb_tunnel_one_dp(struct tb *tb) goto err_detach_group; } - ret = tb_available_bandwidth(tb, in, out, &available_up, &available_down); + ret = tb_available_bandwidth(tb, in, out, &available_up, &available_down, + true); if (ret) goto err_reclaim_usb; @@ -1397,6 +1804,13 @@ static bool tb_tunnel_one_dp(struct tb *tb) list_add_tail(&tunnel->list, &tcm->tunnel_list); tb_reclaim_usb3_bandwidth(tb, in, out); + /* + * Transition the links to asymmetric if the consumption exceeds + * the threshold. + */ + if (!tb_tunnel_consumed_bandwidth(tunnel, &consumed_up, &consumed_down)) + tb_configure_asym(tb, in, out, consumed_up, consumed_down); + /* Update the domain with the new bandwidth estimation */ tb_recalc_estimated_bandwidth(tb); @@ -1903,6 +2317,11 @@ static int tb_alloc_dp_bandwidth(struct tb_tunnel *tunnel, int *requested_up, if ((*requested_up >= 0 && requested_up_corrected <= allocated_up) || (*requested_down >= 0 && requested_down_corrected <= allocated_down)) { + /* + * If bandwidth on a link is < asym_threshold transition + * the link to symmetric. + */ + tb_configure_sym(tb, in, out, *requested_up, *requested_down); /* * If requested bandwidth is less or equal than what is * currently allocated to that tunnel we simply change @@ -1928,7 +2347,8 @@ static int tb_alloc_dp_bandwidth(struct tb_tunnel *tunnel, int *requested_up, * are also in the same group but we use the same function here * that we use with the normal bandwidth allocation). */ - ret = tb_available_bandwidth(tb, in, out, &available_up, &available_down); + ret = tb_available_bandwidth(tb, in, out, &available_up, &available_down, + true); if (ret) goto reclaim; @@ -1937,8 +2357,23 @@ static int tb_alloc_dp_bandwidth(struct tb_tunnel *tunnel, int *requested_up, if ((*requested_up >= 0 && available_up >= requested_up_corrected) || (*requested_down >= 0 && available_down >= requested_down_corrected)) { + /* + * If bandwidth on a link is >= asym_threshold + * transition the link to asymmetric. + */ + ret = tb_configure_asym(tb, in, out, *requested_up, + *requested_down); + if (ret) { + tb_configure_sym(tb, in, out, 0, 0); + return ret; + } + ret = tb_tunnel_alloc_bandwidth(tunnel, requested_up, requested_down); + if (ret) { + tb_tunnel_warn(tunnel, "failed to allocate bandwidth\n"); + tb_configure_sym(tb, in, out, 0, 0); + } } else { ret = -ENOBUFS; } From 4d0261cea469855fa3a33df8dd1cb29b7e6ee05a Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 1 Oct 2024 17:31:09 +0000 Subject: [PATCH 509/534] thunderbolt: Improve DisplayPort tunnel setup process to be more robust [ Upstream commit b4734507ac55cc7ea1380e20e83f60fcd7031955 ] After DisplayPort tunnel setup, we add verification that the DPRX capabilities read process completed. Otherwise, we bail out, teardown the tunnel, and try setup another DisplayPort tunnel using next available DP IN adapter. We do so till all DP IN adapters tried. This way, we avoid allocating DP IN adapter and (bandwidth for it) for unusable tunnel. Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Qin Wan Signed-off-by: Alexandru Gagniuc Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tb.c | 86 ++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index c52cbd5194f1..ea155547e871 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1699,48 +1699,14 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in) return NULL; } -static bool tb_tunnel_one_dp(struct tb *tb) +static bool tb_tunnel_one_dp(struct tb *tb, struct tb_port *in, + struct tb_port *out) { int available_up, available_down, ret, link_nr; struct tb_cm *tcm = tb_priv(tb); - struct tb_port *port, *in, *out; int consumed_up, consumed_down; struct tb_tunnel *tunnel; - /* - * Find pair of inactive DP IN and DP OUT adapters and then - * establish a DP tunnel between them. - */ - tb_dbg(tb, "looking for DP IN <-> DP OUT pairs:\n"); - - in = NULL; - out = NULL; - list_for_each_entry(port, &tcm->dp_resources, list) { - if (!tb_port_is_dpin(port)) - continue; - - if (tb_port_is_enabled(port)) { - tb_port_dbg(port, "DP IN in use\n"); - continue; - } - - in = port; - tb_port_dbg(in, "DP IN available\n"); - - out = tb_find_dp_out(tb, port); - if (out) - break; - } - - if (!in) { - tb_dbg(tb, "no suitable DP IN adapter available, not tunneling\n"); - return false; - } - if (!out) { - tb_dbg(tb, "no suitable DP OUT adapter available, not tunneling\n"); - return false; - } - /* * This is only applicable to links that are not bonded (so * when Thunderbolt 1 hardware is involved somewhere in the @@ -1801,15 +1767,19 @@ static bool tb_tunnel_one_dp(struct tb *tb) goto err_free; } - list_add_tail(&tunnel->list, &tcm->tunnel_list); - tb_reclaim_usb3_bandwidth(tb, in, out); + /* If fail reading tunnel's consumed bandwidth, tear it down */ + ret = tb_tunnel_consumed_bandwidth(tunnel, &consumed_up, &consumed_down); + if (ret) + goto err_deactivate; + list_add_tail(&tunnel->list, &tcm->tunnel_list); + + tb_reclaim_usb3_bandwidth(tb, in, out); /* * Transition the links to asymmetric if the consumption exceeds * the threshold. */ - if (!tb_tunnel_consumed_bandwidth(tunnel, &consumed_up, &consumed_down)) - tb_configure_asym(tb, in, out, consumed_up, consumed_down); + tb_configure_asym(tb, in, out, consumed_up, consumed_down); /* Update the domain with the new bandwidth estimation */ tb_recalc_estimated_bandwidth(tb); @@ -1821,6 +1791,8 @@ static bool tb_tunnel_one_dp(struct tb *tb) tb_increase_tmu_accuracy(tunnel); return true; +err_deactivate: + tb_tunnel_deactivate(tunnel); err_free: tb_tunnel_free(tunnel); err_reclaim_usb: @@ -1840,13 +1812,43 @@ err_rpm_put: static void tb_tunnel_dp(struct tb *tb) { + struct tb_cm *tcm = tb_priv(tb); + struct tb_port *port, *in, *out; + if (!tb_acpi_may_tunnel_dp()) { tb_dbg(tb, "DP tunneling disabled, not creating tunnel\n"); return; } - while (tb_tunnel_one_dp(tb)) - ; + /* + * Find pair of inactive DP IN and DP OUT adapters and then + * establish a DP tunnel between them. + */ + tb_dbg(tb, "looking for DP IN <-> DP OUT pairs:\n"); + + in = NULL; + out = NULL; + list_for_each_entry(port, &tcm->dp_resources, list) { + if (!tb_port_is_dpin(port)) + continue; + + if (tb_port_is_enabled(port)) { + tb_port_dbg(port, "DP IN in use\n"); + continue; + } + + in = port; + tb_port_dbg(in, "DP IN available\n"); + + out = tb_find_dp_out(tb, port); + if (out) + tb_tunnel_one_dp(tb, in, out); + else + tb_port_dbg(in, "no suitable DP OUT adapter available, not tunneling\n"); + } + + if (!in) + tb_dbg(tb, "no suitable DP IN adapter available, not tunneling\n"); } static void tb_enter_redrive(struct tb_port *port) From ff3c557fa93e7fcf5891e38fa7285b3c44e994b3 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 2 Oct 2024 05:06:23 +0800 Subject: [PATCH 510/534] mm/filemap: return early if failed to allocate memory for split commit de60fd8ddeda2b41fbe11df11733838c5f684616 upstream. xas_split_alloc could fail with NOMEM, and in such case, it should abort early instead of keep going and fail the xas_split below. Link: https://lkml.kernel.org/r/20240416071722.45997-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240415171857.19244-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240415171857.19244-2-ryncsn@gmail.com Signed-off-by: Kairui Song Acked-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 6758c1128ceb ("mm/filemap: optimize filemap folio adding") Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 2662c416e7fa..b9bad8328861 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -872,9 +872,12 @@ noinline int __filemap_add_folio(struct address_space *mapping, unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; - if (order > folio_order(folio)) + if (order > folio_order(folio)) { xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); + if (xas_error(&xas)) + goto error; + } xas_lock_irq(&xas); xas_for_each_conflict(&xas, entry) { old = entry; From 734594d41c8ed87174bbc8f443a935f702de974b Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 2 Oct 2024 05:06:24 +0800 Subject: [PATCH 511/534] lib/xarray: introduce a new helper xas_get_order commit a4864671ca0bf51c8e78242951741df52c06766f upstream. It can be used after xas_load to check the order of loaded entries. Compared to xa_get_order, it saves an XA_STATE and avoid a rewalk. Added new test for xas_get_order, to make the test work, we have to export xas_get_order with EXPORT_SYMBOL_GPL. Also fix a sparse warning by checking the slot value with xa_entry instead of accessing it directly, as suggested by Matthew Wilcox. [kasong@tencent.com: simplify comment, sparse warning fix, per Matthew Wilcox] Link: https://lkml.kernel.org/r/20240416071722.45997-4-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240415171857.19244-4-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 6758c1128ceb ("mm/filemap: optimize filemap folio adding") Signed-off-by: Greg Kroah-Hartman --- include/linux/xarray.h | 6 +++++ lib/test_xarray.c | 34 +++++++++++++++++++++++++++ lib/xarray.c | 53 ++++++++++++++++++++++++++---------------- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index cb571dfcf4b1..d9d479334c9e 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1548,6 +1548,7 @@ void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); +int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else @@ -1556,6 +1557,11 @@ static inline int xa_get_order(struct xarray *xa, unsigned long index) return 0; } +static inline int xas_get_order(struct xa_state *xas) +{ + return 0; +} + static inline void xas_split(struct xa_state *xas, void *entry, unsigned int order) { diff --git a/lib/test_xarray.c b/lib/test_xarray.c index e77d4856442c..2e229012920b 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1756,6 +1756,39 @@ static noinline void check_get_order(struct xarray *xa) } } +static noinline void check_xas_get_order(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j; + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xas_set_order(&xas, i << order, order); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(i)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + for (j = i << order; j < (i + 1) << order; j++) { + xas_set_order(&xas, j, 0); + rcu_read_lock(); + xas_load(&xas); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + rcu_read_unlock(); + } + + xas_lock(&xas); + xas_set_order(&xas, i << order, order); + xas_store(&xas, NULL); + xas_unlock(&xas); + } + } +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -1805,6 +1838,7 @@ static int xarray_checks(void) check_reserve(&xa0); check_multi_store(&array); check_get_order(&array); + check_xas_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); diff --git a/lib/xarray.c b/lib/xarray.c index 39f07bfc4dcc..da79128ad754 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1749,6 +1749,36 @@ unlock: } EXPORT_SYMBOL(xa_store_range); +/** + * xas_get_order() - Get the order of an entry. + * @xas: XArray operation state. + * + * Called after xas_load, the xas should not be in an error state. + * + * Return: A number between 0 and 63 indicating the order of the entry. + */ +int xas_get_order(struct xa_state *xas) +{ + int order = 0; + + if (!xas->xa_node) + return 0; + + for (;;) { + unsigned int slot = xas->xa_offset + (1 << order); + + if (slot >= XA_CHUNK_SIZE) + break; + if (!xa_is_sibling(xa_entry(xas->xa, xas->xa_node, slot))) + break; + order++; + } + + order += xas->xa_node->shift; + return order; +} +EXPORT_SYMBOL_GPL(xas_get_order); + /** * xa_get_order() - Get the order of an entry. * @xa: XArray. @@ -1759,30 +1789,13 @@ EXPORT_SYMBOL(xa_store_range); int xa_get_order(struct xarray *xa, unsigned long index) { XA_STATE(xas, xa, index); - void *entry; int order = 0; + void *entry; rcu_read_lock(); entry = xas_load(&xas); - - if (!entry) - goto unlock; - - if (!xas.xa_node) - goto unlock; - - for (;;) { - unsigned int slot = xas.xa_offset + (1 << order); - - if (slot >= XA_CHUNK_SIZE) - break; - if (!xa_is_sibling(xas.xa_node->slots[slot])) - break; - order++; - } - - order += xas.xa_node->shift; -unlock: + if (entry) + order = xas_get_order(&xas); rcu_read_unlock(); return order; From 722e9e5acccf2f00368e11c8cc792032626202f7 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 2 Oct 2024 05:06:25 +0800 Subject: [PATCH 512/534] mm/filemap: optimize filemap folio adding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6758c1128ceb45d1a35298912b974eb4895b7dd9 upstream. Instead of doing multiple tree walks, do one optimism range check with lock hold, and exit if raced with another insertion. If a shadow exists, check it with a new xas_get_order helper before releasing the lock to avoid redundant tree walks for getting its order. Drop the lock and do the allocation only if a split is needed. In the best case, it only need to walk the tree once. If it needs to alloc and split, 3 walks are issued (One for first ranged conflict check and order retrieving, one for the second check after allocation, one for the insert after split). Testing with 4K pages, in an 8G cgroup, with 16G brd as block device: echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap --rw=randread --time_based \ --ramp_time=30s --runtime=5m --group_reporting Before: bw ( MiB/s): min= 1027, max= 3520, per=100.00%, avg=2445.02, stdev=18.90, samples=8691 iops : min=263001, max=901288, avg=625924.36, stdev=4837.28, samples=8691 After (+7.3%): bw ( MiB/s): min= 493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651 iops : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651 Test result with THP (do a THP randread then switch to 4K page in hope it issues a lot of splitting): echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap -thp=1 --readonly \ --rw=randread --time_based --ramp_time=30s --runtime=10m \ --group_reporting fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap \ --rw=randread --time_based --runtime=5s --group_reporting Before: bw ( KiB/s): min= 4141, max=14202, per=100.00%, avg=7935.51, stdev=96.85, samples=18976 iops : min= 1029, max= 3548, avg=1979.52, stdev=24.23, samples=18976· READ: bw=4545B/s (4545B/s), 4545B/s-4545B/s (4545B/s-4545B/s), io=64.0KiB (65.5kB), run=14419-14419msec After (+12.5%): bw ( KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146 iops : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146 READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec The performance is better for both 4K (+7.5%) and THP (+12.5%) cached read. Link: https://lkml.kernel.org/r/20240415171857.19244-5-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Closes: https://lore.kernel.org/linux-mm/A5A976CB-DB57-4513-A700-656580488AB6@flyingcircus.io/ [ kasong@tencent.com: minor adjustment of variable declarations ] Signed-off-by: Greg Kroah-Hartman --- lib/test_xarray.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 53 +++++++++++++++++++++++++++++++----------- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 2e229012920b..542926da61a3 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1789,6 +1789,64 @@ static noinline void check_xas_get_order(struct xarray *xa) } } +static noinline void check_xas_conflict_get_order(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + void *entry; + int only_once; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j, k; + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xas_set_order(&xas, i << order, order); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(i)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + /* + * Ensure xas_get_order works with xas_for_each_conflict. + */ + j = i << order; + for (k = 0; k < order; k++) { + only_once = 0; + xas_set_order(&xas, j + (1 << k), k); + xas_lock(&xas); + xas_for_each_conflict(&xas, entry) { + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + only_once++; + } + XA_BUG_ON(xa, only_once != 1); + xas_unlock(&xas); + } + + if (order < max_order - 1) { + only_once = 0; + xas_set_order(&xas, (i & ~1UL) << order, order + 1); + xas_lock(&xas); + xas_for_each_conflict(&xas, entry) { + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, xas_get_order(&xas) != order); + only_once++; + } + XA_BUG_ON(xa, only_once != 1); + xas_unlock(&xas); + } + + xas_set_order(&xas, i << order, order); + xas_lock(&xas); + xas_store(&xas, NULL); + xas_unlock(&xas); + } + } +} + + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -1839,6 +1897,7 @@ static int xarray_checks(void) check_multi_store(&array); check_get_order(&array); check_xas_get_order(&array); + check_xas_conflict_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); diff --git a/mm/filemap.c b/mm/filemap.c index b9bad8328861..e6c112f3a211 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -846,6 +846,8 @@ noinline int __filemap_add_folio(struct address_space *mapping, { XA_STATE(xas, &mapping->i_pages, index); int huge = folio_test_hugetlb(folio); + void *alloced_shadow = NULL; + int alloced_order = 0; bool charged = false; long nr = 1; @@ -868,16 +870,10 @@ noinline int __filemap_add_folio(struct address_space *mapping, folio->mapping = mapping; folio->index = xas.xa_index; - do { - unsigned int order = xa_get_order(xas.xa, xas.xa_index); + for (;;) { + int order = -1, split_order = 0; void *entry, *old = NULL; - if (order > folio_order(folio)) { - xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), - order, gfp); - if (xas_error(&xas)) - goto error; - } xas_lock_irq(&xas); xas_for_each_conflict(&xas, entry) { old = entry; @@ -885,19 +881,33 @@ noinline int __filemap_add_folio(struct address_space *mapping, xas_set_err(&xas, -EEXIST); goto unlock; } + /* + * If a larger entry exists, + * it will be the first and only entry iterated. + */ + if (order == -1) + order = xas_get_order(&xas); + } + + /* entry may have changed before we re-acquire the lock */ + if (alloced_order && (old != alloced_shadow || order != alloced_order)) { + xas_destroy(&xas); + alloced_order = 0; } if (old) { - if (shadowp) - *shadowp = old; - /* entry may have been split before we acquired lock */ - order = xa_get_order(xas.xa, xas.xa_index); - if (order > folio_order(folio)) { + if (order > 0 && order > folio_order(folio)) { /* How to handle large swap entries? */ BUG_ON(shmem_mapping(mapping)); + if (!alloced_order) { + split_order = order; + goto unlock; + } xas_split(&xas, old, order); xas_reset(&xas); } + if (shadowp) + *shadowp = old; } xas_store(&xas, folio); @@ -913,9 +923,24 @@ noinline int __filemap_add_folio(struct address_space *mapping, __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); } + unlock: xas_unlock_irq(&xas); - } while (xas_nomem(&xas, gfp)); + + /* split needed, alloc here and retry. */ + if (split_order) { + xas_split_alloc(&xas, old, split_order, gfp); + if (xas_error(&xas)) + goto error; + alloced_shadow = old; + alloced_order = split_order; + xas_reset(&xas); + continue; + } + + if (!xas_nomem(&xas, gfp)) + break; + } if (xas_error(&xas)) goto error; From b3c10ac84c5a544ccd92ee4f891441df91a8cfbc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 10 Sep 2024 22:55:08 -0700 Subject: [PATCH 513/534] bpf: lsm: Set bpf_lsm_blob_sizes.lbs_task to 0 commit 300a90b2cb5d442879e6398920c49aebbd5c8e40 upstream. bpf task local storage is now using task_struct->bpf_storage, so bpf_lsm_blob_sizes.lbs_task is no longer needed. Remove it to save some memory. Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs") Cc: stable@vger.kernel.org Cc: KP Singh Cc: Matt Bobrowski Signed-off-by: Song Liu Acked-by: Matt Bobrowski Link: https://lore.kernel.org/r/20240911055508.9588-1-song@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- security/bpf/hooks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index cfaf1d0e6a5f..35933ae53b92 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -24,7 +24,6 @@ static int __init bpf_lsm_init(void) struct lsm_blob_sizes bpf_lsm_blob_sizes __ro_after_init = { .lbs_inode = sizeof(struct bpf_storage_blob), - .lbs_task = sizeof(struct bpf_storage_blob), }; DEFINE_LSM(bpf) = { From bd24f30f5068df603b7d352243e15718e5c3add2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 24 Sep 2024 15:18:29 +0200 Subject: [PATCH 514/534] dm-verity: restart or panic on an I/O error commit e6a3531dd542cb127c8de32ab1e54a48ae19962b upstream. Maxim Suhanov reported that dm-verity doesn't crash if an I/O error happens. In theory, this could be used to subvert security, because an attacker can create sectors that return error with the Write Uncorrectable command. Some programs may misbehave if they have to deal with EIO. This commit fixes dm-verity, so that if "panic_on_corruption" or "restart_on_corruption" was specified and an I/O error happens, the machine will panic or restart. This commit also changes kernel_restart to emergency_restart - kernel_restart calls reboot notifiers and these reboot notifiers may wait for the bio that failed. emergency_restart doesn't call the notifiers. Reported-by: Maxim Suhanov Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 3636387ce565..442632e60e9c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -264,8 +264,10 @@ out: if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) - kernel_restart("dm-verity device corrupted"); + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } if (v->mode == DM_VERITY_MODE_PANIC) panic("dm-verity device corrupted"); @@ -689,6 +691,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_tasklet_enabled) || !io->in_tasklet) verity_fec_finish_io(io); + if (unlikely(status != BLK_STS_OK) && + unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !verity_is_system_shutting_down()) { + if (v->mode == DM_VERITY_MODE_RESTART || + v->mode == DM_VERITY_MODE_PANIC) + DMERR_LIMIT("%s has error: %s", v->data_dev->name, + blk_status_to_str(status)); + + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } + + if (v->mode == DM_VERITY_MODE_PANIC) + panic("dm-verity device corrupted"); + } + bio_endio(bio); } From e1e734c1a085c263f26ecb420221587f80172088 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 20 Jun 2024 22:54:34 +0000 Subject: [PATCH 515/534] lockdep: fix deadlock issue between lockdep and rcu commit a6f88ac32c6e63e69c595bfae220d8641704c9b7 upstream. There is a deadlock scenario between lockdep and rcu when rcu nocb feature is enabled, just as following call stack: rcuop/x -000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?) -001|queued_spin_lock(inline) // try to hold nocb_gp_lock -001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80) -002|__raw_spin_lock_irqsave(inline) -002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80) -003|wake_nocb_gp_defer(inline) -003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680) -004|__call_rcu_common(inline) -004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?) -005|call_rcu_zapped(inline) -005|free_zapped_rcu(ch = ?)// hold graph lock -006|rcu_do_batch(rdp = 0xFFFFFF817F245680) -007|nocb_cb_wait(inline) -007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680) -008|kthread(_create = 0xFFFFFF80803122C0) -009|ret_from_fork(asm) rcuop/y -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0) -001|queued_spin_lock() -001|lockdep_lock() -001|graph_lock() // try to hold graph lock -002|lookup_chain_cache_add() -002|validate_chain() -003|lock_acquire -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80) -005|lock_timer_base(inline) -006|mod_timer(inline) -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680) -007|__call_rcu_common(inline) -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?) -008|call_rcu_hurry(inline) -008|rcu_sync_call(inline) -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58) -009|rcu_do_batch(rdp = 0xFFFFFF817F266680) -010|nocb_cb_wait(inline) -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680) -011|kthread(_create = 0xFFFFFF8080363740) -012|ret_from_fork(asm) rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread. This patch release the graph lock before lockdep call_rcu. Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use") Cc: stable@vger.kernel.org Cc: Boqun Feng Cc: Waiman Long Cc: Carlos Llamas Cc: Bart Van Assche Signed-off-by: Zhiguo Niu Signed-off-by: Xuewen Yan Reviewed-by: Waiman Long Reviewed-by: Carlos Llamas Reviewed-by: Bart Van Assche Signed-off-by: Carlos Llamas Acked-by: Paul E. McKenney Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20240620225436.3127927-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 50 ++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 151bd3de5936..3468d8230e5f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6184,25 +6184,27 @@ static struct pending_free *get_pending_free(void) static void free_zapped_rcu(struct rcu_head *cb); /* - * Schedule an RCU callback if no RCU callback is pending. Must be called with - * the graph lock held. - */ -static void call_rcu_zapped(struct pending_free *pf) +* See if we need to queue an RCU callback, must called with +* the lockdep lock held, returns false if either we don't have +* any pending free or the callback is already scheduled. +* Otherwise, a call_rcu() must follow this function call. +*/ +static bool prepare_call_rcu_zapped(struct pending_free *pf) { WARN_ON_ONCE(inside_selftest()); if (list_empty(&pf->zapped)) - return; + return false; if (delayed_free.scheduled) - return; + return false; delayed_free.scheduled = true; WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf); delayed_free.index ^= 1; - call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + return true; } /* The caller must hold the graph lock. May be called from RCU context. */ @@ -6228,6 +6230,7 @@ static void free_zapped_rcu(struct rcu_head *ch) { struct pending_free *pf; unsigned long flags; + bool need_callback; if (WARN_ON_ONCE(ch != &delayed_free.rcu_head)) return; @@ -6239,14 +6242,18 @@ static void free_zapped_rcu(struct rcu_head *ch) pf = delayed_free.pf + (delayed_free.index ^ 1); __free_zapped_classes(pf); delayed_free.scheduled = false; - - /* - * If there's anything on the open list, close and start a new callback. - */ - call_rcu_zapped(delayed_free.pf + delayed_free.index); - + need_callback = + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index); lockdep_unlock(); raw_local_irq_restore(flags); + + /* + * If there's pending free and its callback has not been scheduled, + * queue an RCU callback. + */ + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + } /* @@ -6286,6 +6293,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) { struct pending_free *pf; unsigned long flags; + bool need_callback; init_data_structures_once(); @@ -6293,10 +6301,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size) lockdep_lock(); pf = get_pending_free(); __lockdep_free_key_range(pf, start, size); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); lockdep_unlock(); raw_local_irq_restore(flags); - + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); /* * Wait for any possible iterators from look_up_lock_class() to pass * before continuing to free the memory they refer to. @@ -6390,6 +6399,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) struct pending_free *pf; unsigned long flags; int locked; + bool need_callback = false; raw_local_irq_save(flags); locked = graph_lock(); @@ -6398,11 +6408,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock) pf = get_pending_free(); __lockdep_reset_lock(pf, lock); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); graph_unlock(); out_irq: raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); } /* @@ -6446,6 +6458,7 @@ void lockdep_unregister_key(struct lock_class_key *key) struct pending_free *pf; unsigned long flags; bool found = false; + bool need_callback = false; might_sleep(); @@ -6466,11 +6479,14 @@ void lockdep_unregister_key(struct lock_class_key *key) if (found) { pf = get_pending_free(); __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); + need_callback = prepare_call_rcu_zapped(pf); } lockdep_unlock(); raw_local_irq_restore(flags); + if (need_callback) + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); + /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ synchronize_rcu(); } From 9347605691349160768d604f6002a50610defe4a Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 3 Aug 2024 15:46:41 +0800 Subject: [PATCH 516/534] mm: only enforce minimum stack gap size if it's sensible commit 69b50d4351ed924f29e3d46b159e28f70dfc707f upstream. The generic mmap_base code tries to leave a gap between the top of the stack and the mmap base address, but enforces a minimum gap size (MIN_GAP) of 128MB, which is too large on some setups. In particular, on arm tasks without ADDR_LIMIT_32BIT, the STACK_TOP value is less than 128MB, so it's impossible to fit such a gap in. Only enforce this minimum if MIN_GAP < MAX_GAP, as we'd prefer to honour MAX_GAP, which is defined proportionally, so scales better and always leaves us with both _some_ stack space and some room for mmap. This fixes the usercopy KUnit test suite on 32-bit arm, as it doesn't set any personality flags so gets the default (in this case 26-bit) task size. This test can be run with: ./tools/testing/kunit/kunit.py run --arch arm usercopy --make_options LLVM=1 Link: https://lkml.kernel.org/r/20240803074642.1849623-2-davidgow@google.com Fixes: dba79c3df4a2 ("arm: use generic mmap top-down layout and brk randomization") Signed-off-by: David Gow Reviewed-by: Kees Cook Cc: Alexandre Ghiti Cc: Linus Walleij Cc: Luis Chamberlain Cc: Mark Rutland Cc: Russell King Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index 2d5a309c4e54..08d494896552 100644 --- a/mm/util.c +++ b/mm/util.c @@ -434,7 +434,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) if (gap + pad > gap) gap += pad; - if (gap < MIN_GAP) + if (gap < MIN_GAP && MIN_GAP < MAX_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; From 236eb2f95ad00269d72239474c0af52f8e6d7449 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 5 Sep 2024 17:43:37 +0800 Subject: [PATCH 517/534] spi: fspi: add support for imx8ulp commit 9228956a620553d7fd17f703a37a26c91e4d92ab upstream. The flexspi on imx8ulp only has 16 LUTs, different with others which have up to 32 LUTs. Add a separate compatible string and nxp_fspi_devtype_data to support flexspi on imx8ulp. Fixes: ef89fd56bdfc ("arm64: dts: imx8ulp: add flexspi node") Cc: stable@kernel.org Signed-off-by: Haibo Chen Reviewed-by: Frank Li Link: https://patch.msgid.link/20240905094338.1986871-4-haibo.chen@nxp.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-nxp-fspi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 5053dbd3081a..bc6c086ddd43 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -371,6 +371,15 @@ static struct nxp_fspi_devtype_data imx8dxl_data = { .little_endian = true, /* little-endian */ }; +static struct nxp_fspi_devtype_data imx8ulp_data = { + .rxfifo = SZ_512, /* (64 * 64 bits) */ + .txfifo = SZ_1K, /* (128 * 64 bits) */ + .ahb_buf_size = SZ_2K, /* (256 * 64 bits) */ + .quirks = 0, + .lut_num = 16, + .little_endian = true, /* little-endian */ +}; + struct nxp_fspi { void __iomem *iobase; void __iomem *ahb_addr; @@ -1297,6 +1306,7 @@ static const struct of_device_id nxp_fspi_dt_ids[] = { { .compatible = "nxp,imx8mp-fspi", .data = (void *)&imx8mm_data, }, { .compatible = "nxp,imx8qxp-fspi", .data = (void *)&imx8qxp_data, }, { .compatible = "nxp,imx8dxl-fspi", .data = (void *)&imx8dxl_data, }, + { .compatible = "nxp,imx8ulp-fspi", .data = (void *)&imx8ulp_data, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids); From 6ec62dba4a19998f88252ff522c07f87c6cd2cb4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 11 Jun 2024 09:50:32 +0200 Subject: [PATCH 518/534] module: Fix KCOV-ignored file name commit f34d086fb7102fec895fd58b9e816b981b284c17 upstream. module.c was renamed to main.c, but the Makefile directive was copy-pasted verbatim with the old file name. Fix up the file name. Fixes: cfc1d277891e ("module: Move all into module/") Signed-off-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Reviewed-by: Alexander Potapenko Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/bc0cf790b4839c5e38e2fafc64271f620568a39e.1718092070.git.dvyukov@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/module/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module/Makefile b/kernel/module/Makefile index a10b2b9a6fdf..50ffcc413b54 100644 --- a/kernel/module/Makefile +++ b/kernel/module/Makefile @@ -5,7 +5,7 @@ # These are called from save_stack_trace() on slub debug path, # and produce insane amounts of uninteresting coverage. -KCOV_INSTRUMENT_module.o := n +KCOV_INSTRUMENT_main.o := n obj-y += main.o obj-y += strict_rwx.o From b35a42bdaf18bd6f2e3137f7072567040ea5d81e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 4 Sep 2024 17:12:04 -0700 Subject: [PATCH 519/534] mm/damon/vaddr: protect vma traversal in __damon_va_thre_regions() with rcu read lock commit fb497d6db7c19c797cbd694b52d1af87c4eebcc6 upstream. Traversing VMAs of a given maple tree should be protected by rcu read lock. However, __damon_va_three_regions() is not doing the protection. Hold the lock. Link: https://lkml.kernel.org/r/20240905001204.1481-1-sj@kernel.org Fixes: d0cf3dd47f0d ("damon: convert __damon_va_three_regions to use the VMA iterator") Signed-off-by: Liam R. Howlett Signed-off-by: SeongJae Park Reported-by: Guenter Roeck Closes: https://lore.kernel.org/b83651a0-5b24-4206-b860-cb54ffdf209b@roeck-us.net Tested-by: Guenter Roeck Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/vaddr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index cf8a9fc5c9d1..530f01fedd35 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -126,6 +126,7 @@ static int __damon_va_three_regions(struct mm_struct *mm, * If this is too slow, it can be optimised to examine the maple * tree gaps. */ + rcu_read_lock(); for_each_vma(vmi, vma) { unsigned long gap; @@ -146,6 +147,7 @@ static int __damon_va_three_regions(struct mm_struct *mm, next: prev = vma; } + rcu_read_unlock(); if (!sz_range(&second_gap) || !sz_range(&first_gap)) return -EINVAL; From 88dfb1dd17d6727b67188077099e54739cbd50be Mon Sep 17 00:00:00 2001 From: Tommy Huang Date: Wed, 11 Sep 2024 17:39:51 +0800 Subject: [PATCH 520/534] i2c: aspeed: Update the stop sw state when the bus recovery occurs commit 93701d3b84ac5f3ea07259d4ced405c53d757985 upstream. When the i2c bus recovery occurs, driver will send i2c stop command in the scl low condition. In this case the sw state will still keep original situation. Under multi-master usage, i2c bus recovery will be called when i2c transfer timeout occurs. Update the stop command calling with aspeed_i2c_do_stop function to update master_state. Fixes: f327c686d3ba ("i2c: aspeed: added driver for Aspeed I2C") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Tommy Huang Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-aspeed.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 5511fd46a65e..83e6714901b2 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -170,6 +170,13 @@ struct aspeed_i2c_bus { static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus); +/* precondition: bus.lock has been acquired. */ +static void aspeed_i2c_do_stop(struct aspeed_i2c_bus *bus) +{ + bus->master_state = ASPEED_I2C_MASTER_STOP; + writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG); +} + static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) { unsigned long time_left, flags; @@ -187,7 +194,7 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) command); reinit_completion(&bus->cmd_complete); - writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG); + aspeed_i2c_do_stop(bus); spin_unlock_irqrestore(&bus->lock, flags); time_left = wait_for_completion_timeout( @@ -390,13 +397,6 @@ static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus) writel(command, bus->base + ASPEED_I2C_CMD_REG); } -/* precondition: bus.lock has been acquired. */ -static void aspeed_i2c_do_stop(struct aspeed_i2c_bus *bus) -{ - bus->master_state = ASPEED_I2C_MASTER_STOP; - writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG); -} - /* precondition: bus.lock has been acquired. */ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) { From d669e782900775c1ff3434d600c6028198d093f4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Sep 2024 18:39:14 +0300 Subject: [PATCH 521/534] i2c: isch: Add missed 'else' commit 1db4da55070d6a2754efeb3743f5312fc32f5961 upstream. In accordance with the existing comment and code analysis it is quite likely that there is a missed 'else' when adapter times out. Add it. Fixes: 5bc1200852c3 ("i2c: Add Intel SCH SMBus support") Signed-off-by: Andy Shevchenko Cc: # v2.6.27+ Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-isch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c index 1dc1ceaa4443..8c16c4695574 100644 --- a/drivers/i2c/busses/i2c-isch.c +++ b/drivers/i2c/busses/i2c-isch.c @@ -99,8 +99,7 @@ static int sch_transaction(void) if (retries > MAX_RETRIES) { dev_err(&sch_adapter.dev, "SMBus Timeout!\n"); result = -ETIMEDOUT; - } - if (temp & 0x04) { + } else if (temp & 0x04) { result = -EIO; dev_dbg(&sch_adapter.dev, "Bus collision! SMBus may be " "locked until next hard reset. (sorry!)\n"); From 7390c461264c7b8a0b489faf30c05ed0af7ebacf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Sep 2024 11:45:45 -0400 Subject: [PATCH 522/534] Documentation: KVM: fix warning in "make htmldocs" commit efbc6bd090f48ccf64f7a8dd5daea775821d57ec upstream. The warning Documentation/virt/kvm/locking.rst:31: ERROR: Unexpected indentation. is caused by incorrectly treating a line as the continuation of a paragraph, rather than as the first line in a bullet list. Fixed: 44d174596260 ("KVM: Use dedicated mutex to protect kvm_usage_count to avoid deadlock") Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- Documentation/virt/kvm/locking.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 703d3781c775..887d9d2fed49 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -25,6 +25,7 @@ The acquisition orders for mutexes are as follows: must not take either kvm->slots_lock or kvm->slots_arch_lock. cpus_read_lock() vs kvm_lock: + - Taking cpus_read_lock() outside of kvm_lock is problematic, despite that being the official ordering, as it is quite easy to unknowingly trigger cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list, From 790c630ab0e7d7aba6d186581d4627c09fce60f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 19 Sep 2024 15:28:53 +0200 Subject: [PATCH 523/534] bpf: Fix use-after-free in bpf_uprobe_multi_link_attach() commit 5fe6e308abaea082c20fbf2aa5df8e14495622cf upstream. If bpf_link_prime() fails, bpf_uprobe_multi_link_attach() goes to the error_free label and frees the array of bpf_uprobe's without calling bpf_uprobe_unregister(). This leaks bpf_uprobe->uprobe and worse, this frees bpf_uprobe->consumer without removing it from the uprobe->consumers list. Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link") Closes: https://lore.kernel.org/all/000000000000382d39061f59f2dd@google.com/ Reported-by: syzbot+f7a1c2c2711e4a780f19@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Jiri Olsa Tested-by: syzbot+f7a1c2c2711e4a780f19@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240813152524.GA7292@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/trace/bpf_trace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 37a19598d712..eca858bde804 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3285,17 +3285,20 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr uprobes[i].ref_ctr_offset, &uprobes[i].consumer); if (err) { - bpf_uprobe_unregister(&path, uprobes, i); - goto error_free; + link->cnt = i; + goto error_unregister; } } err = bpf_link_prime(&link->link, &link_primer); if (err) - goto error_free; + goto error_unregister; return bpf_link_settle(&link_primer); +error_unregister: + bpf_uprobe_unregister(&path, uprobes, link->cnt); + error_free: kvfree(uprobes); kfree(link); From e43caacf61101a962b9ba35c9d2b5c2628ec5eca Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 18 Dec 2023 22:36:35 -0800 Subject: [PATCH 524/534] usb: yurex: Fix inconsistent locking bug in yurex_read() commit e7d3b9f28654dbfce7e09f8028210489adaf6a33 upstream. Unlock before returning on the error path. Fixes: 86b20af11e84 ("usb: yurex: Replace snprintf() with the safer scnprintf() variant") Reported-by: Dan Carpenter Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202312170252.3udgrIcP-lkp@intel.com/ Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20231219063639.450994-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 44136989f6c6..6adaaf66c14d 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -413,8 +413,10 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, return -ENODEV; } - if (WARN_ON_ONCE(dev->bbu > S64_MAX || dev->bbu < S64_MIN)) + if (WARN_ON_ONCE(dev->bbu > S64_MAX || dev->bbu < S64_MIN)) { + mutex_unlock(&dev->io_mutex); return -EIO; + } spin_lock_irq(&dev->lock); scnprintf(in_buffer, MAX_S64_STRLEN, "%lld\n", dev->bbu); From 18b5ee7bf70048d85cc1b424e862cb762421b2c5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 11 Dec 2023 19:27:28 +0000 Subject: [PATCH 525/534] perf/arm-cmn: Fail DTC counter allocation correctly commit 1892fe103c3a20fced306c8dafa74f7f6d4ea0a3 upstream. Calling arm_cmn_event_clear() before all DTC indices are allocated is wrong, and can lead to arm_cmn_event_add() erroneously clearing live counters from full DTCs where allocation fails. Since the DTC counters are only updated by arm_cmn_init_counter() after all DTC and DTM allocations succeed, nothing actually needs cleaning up in this case anyway, and it should just return directly as it did before. Fixes: 7633ec2c262f ("perf/arm-cmn: Rework DTC counters (again)") Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Acked-by: Will Deacon Link: https://lore.kernel.org/r/ed589c0d8e4130dc68b8ad1625226d28bdc185d4.1702322847.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 7d5e8cb96e9b..0b3ce7713645 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1794,7 +1794,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) idx = 0; while (cmn->dtc[j].counters[idx]) if (++idx == CMN_DT_NUM_COUNTERS) - goto free_dtms; + return -ENOSPC; } hw->dtc_idx[j] = idx; } From 72a3aef9640e5e7e4645e76435835ae3d8df0784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Apitzsch?= Date: Sun, 1 Oct 2023 18:09:56 +0200 Subject: [PATCH 526/534] iio: magnetometer: ak8975: Fix 'Unexpected device' error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 848f68c760ab1e14a9046ea6e45e3304ab9fa50b upstream. Explicity specify array indices to fix mapping between asahi_compass_chipset and ak_def_array. While at it, remove unneeded AKXXXX. Fixes: 4f9ea93afde1 ("iio: magnetometer: ak8975: Convert enum->pointer for data in the match tables") Signed-off-by: André Apitzsch Reviewed-by: Biju Das Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231001-ak_magnetometer-v1-1-09bf3b8798a3@apitzsch.eu Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/ak8975.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index a79a1d5b4639..3b7984fd129d 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -204,7 +204,6 @@ static long ak09912_raw_to_gauss(u16 data) /* Compatible Asahi Kasei Compass parts */ enum asahi_compass_chipset { - AKXXXX = 0, AK8975, AK8963, AK09911, @@ -248,7 +247,7 @@ struct ak_def { }; static const struct ak_def ak_def_array[] = { - { + [AK8975] = { .type = AK8975, .raw_to_gauss = ak8975_raw_to_gauss, .range = 4096, @@ -273,7 +272,7 @@ static const struct ak_def ak_def_array[] = { AK8975_REG_HYL, AK8975_REG_HZL}, }, - { + [AK8963] = { .type = AK8963, .raw_to_gauss = ak8963_09911_raw_to_gauss, .range = 8190, @@ -298,7 +297,7 @@ static const struct ak_def ak_def_array[] = { AK8975_REG_HYL, AK8975_REG_HZL}, }, - { + [AK09911] = { .type = AK09911, .raw_to_gauss = ak8963_09911_raw_to_gauss, .range = 8192, @@ -323,7 +322,7 @@ static const struct ak_def ak_def_array[] = { AK09912_REG_HYL, AK09912_REG_HZL}, }, - { + [AK09912] = { .type = AK09912, .raw_to_gauss = ak09912_raw_to_gauss, .range = 32752, @@ -348,7 +347,7 @@ static const struct ak_def ak_def_array[] = { AK09912_REG_HYL, AK09912_REG_HZL}, }, - { + [AK09916] = { .type = AK09916, .raw_to_gauss = ak09912_raw_to_gauss, .range = 32752, From 6b5630297e943dd61e5928c5b7e3c3ee3f01be20 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 28 Jan 2024 10:30:56 +0100 Subject: [PATCH 527/534] wifi: brcmfmac: add linefeed at end of file commit 26f0dc8a705ae182eaa126cef0a9870d1a87a5ac upstream. The following sparse warning was reported: drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c:432:49: warning: no newline at end of file Fixes: 31343230abb1 ("wifi: brcmfmac: export firmware interface functions") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/all/20240125165128.7e43a1f3@kernel.org/ Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240128093057.164791-2-arend.vanspriel@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c index bc1c6b5a6e31..6385a7db7f7d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c @@ -429,4 +429,4 @@ s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, const char *name, u16 id, mutex_unlock(&drvr->proto_block); return err; } -BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_get); \ No newline at end of file +BRCMF_EXPORT_SYMBOL_GPL(brcmf_fil_xtlv_data_get); From 3dc5525d59da013e4c2e032c90ab16421d46732f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 7 Nov 2023 14:34:27 +0200 Subject: [PATCH 528/534] thunderbolt: Send uevent after asymmetric/symmetric switch commit 5391bcfa56c79a891734e4d22aa0ca3217b86491 upstream. We should send uevent to userspace whenever the link speed or width changes but tb_switch_asym_enable() and tb_switch_asym_disable() set the sw->link_width already so tb_switch_update_link_attributes() never noticed the change. Fix this so that we let tb_switch_update_link_attributes() update the fields accordingly. Fixes: 81af2952e606 ("thunderbolt: Add support for asymmetric link") Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 6393ce44c253..7ac2c0bbc933 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2981,6 +2981,7 @@ static int tb_switch_lane_bonding_disable(struct tb_switch *sw) return tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100); } +/* Note updating sw->link_width done in tb_switch_update_link_attributes() */ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width) { struct tb_port *up, *down, *port; @@ -3020,10 +3021,10 @@ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width) return ret; } - sw->link_width = width; return 0; } +/* Note updating sw->link_width done in tb_switch_update_link_attributes() */ static int tb_switch_asym_disable(struct tb_switch *sw) { struct tb_port *up, *down; @@ -3058,7 +3059,6 @@ static int tb_switch_asym_disable(struct tb_switch *sw) return ret; } - sw->link_width = TB_LINK_WIDTH_DUAL; return 0; } From e2ab9fd64d4fcc6bfd9a344f1de8d7c5ed27b941 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Thu, 30 Nov 2023 18:17:13 +0200 Subject: [PATCH 529/534] thunderbolt: Fix minimum allocated USB 3.x and PCIe bandwidth commit f0b94c1c5c7994a74e487f43c91cfc922105a423 upstream. With the current bandwidth allocation we end up reserving too much for the USB 3.x and PCIe tunnels that leads to reduced capabilities for the second DisplayPort tunnel. Fix this by decreasing the USB 3.x allocation to 900 Mb/s which then allows both tunnels to get the maximum HBR2 bandwidth. This way, the reserved bandwidth for USB 3.x and PCIe, would be 1350 Mb/s (taking weights of USB 3.x and PCIe into account). So bandwidth allocations on a link are: USB 3.x + PCIe tunnels => 1350 Mb/s DisplayPort tunnel #1 => 17280 Mb/s DisplayPort tunnel #2 => 17280 Mb/s Total consumed bandwidth is 35910 Mb/s. So that all the above can be tunneled on a Gen 3 link (which allows maximum of 36000 Mb/s). Fixes: 582e70b0d3a4 ("thunderbolt: Change bandwidth reservations to comply USB4 v2") Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/usb4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index e048e81c3027..8db9bd32f473 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -2380,13 +2380,13 @@ int usb4_usb3_port_release_bandwidth(struct tb_port *port, int *upstream_bw, goto err_request; /* - * Always keep 1000 Mb/s to make sure xHCI has at least some + * Always keep 900 Mb/s to make sure xHCI has at least some * bandwidth available for isochronous traffic. */ - if (consumed_up < 1000) - consumed_up = 1000; - if (consumed_down < 1000) - consumed_down = 1000; + if (consumed_up < 900) + consumed_up = 900; + if (consumed_down < 900) + consumed_down = 900; ret = usb4_usb3_port_write_allocated_bandwidth(port, consumed_up, consumed_down); From 440fba897c5ae32d7df1f1d609dbb19e2bba7fbb Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 12 Feb 2024 13:03:34 +0200 Subject: [PATCH 530/534] thunderbolt: Fix NULL pointer dereference in tb_port_update_credits() commit d3d17e23d1a0d1f959b4fa55b35f1802d9c584fa upstream. Olliver reported that his system crashes when plugging in Thunderbolt 1 device: BUG: kernel NULL pointer dereference, address: 0000000000000020 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:tb_port_do_update_credits+0x1b/0x130 [thunderbolt] Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? tb_port_do_update_credits+0x1b/0x130 ? tb_switch_update_link_attributes+0x83/0xd0 tb_switch_add+0x7a2/0xfe0 tb_scan_port+0x236/0x6f0 tb_handle_hotplug+0x6db/0x900 process_one_work+0x171/0x340 worker_thread+0x27b/0x3a0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This is due the fact that some Thunderbolt 1 devices only have one lane adapter. Fix this by checking for the lane 1 before we read its credits. Reported-by: Olliver Schinagl Closes: https://lore.kernel.org/linux-usb/c24c7882-6254-4e68-8f22-f3e8f65dc84f@schinagl.nl/ Fixes: 81af2952e606 ("thunderbolt: Add support for asymmetric link") Cc: stable@vger.kernel.org Cc: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 7ac2c0bbc933..52cb1a3bb8c7 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1271,6 +1271,9 @@ int tb_port_update_credits(struct tb_port *port) ret = tb_port_do_update_credits(port); if (ret) return ret; + + if (!port->dual_link_port) + return 0; return tb_port_do_update_credits(port->dual_link_port); } From 4c0c5dcb5471de5fc8f0a1c4980e5815339e1cee Mon Sep 17 00:00:00 2001 From: "Alexey Gladkov (Intel)" Date: Fri, 13 Sep 2024 19:05:56 +0200 Subject: [PATCH 531/534] x86/tdx: Fix "in-kernel MMIO" check commit d4fc4d01471528da8a9797a065982e05090e1d81 upstream. TDX only supports kernel-initiated MMIO operations. The handle_mmio() function checks if the #VE exception occurred in the kernel and rejects the operation if it did not. However, userspace can deceive the kernel into performing MMIO on its behalf. For example, if userspace can point a syscall to an MMIO address, syscall does get_user() or put_user() on it, triggering MMIO #VE. The kernel will treat the #VE as in-kernel MMIO. Ensure that the target MMIO address is within the kernel before decoding instruction. Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") Signed-off-by: Alexey Gladkov (Intel) Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.1726237595.git.legion%40kernel.org Signed-off-by: Alexey Gladkov (Intel) Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/tdx/tdx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 006041fbb65f..905ac8a3f716 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include #include #include +#include /* MMIO direction */ #define EPT_READ 0 @@ -405,6 +406,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EINVAL; } + if (!fault_in_kernel_space(ve->gla)) { + WARN_ONCE(1, "Access to userspace address is not supported"); + return -EINVAL; + } + /* * Reject EPT violation #VEs that split pages. * From 646749b423c4dc039e2793c8025fe33ad1ad271b Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Thu, 26 Sep 2024 11:03:56 +0200 Subject: [PATCH 532/534] spi: atmel-quadspi: Fix wrong register value written to MR commit 162d9b5d2308c7e48efbc97d36babbf4d73b2c61 upstream. aq->mr should go to MR, nothing else. Fixes: 329ca3eed4a9 ("spi: atmel-quadspi: Avoid overwriting delay register settings") Signed-off-by: Alexander Dahl Link: https://lore.kernel.org/linux-spi/20240926-macarena-wincing-7c4995487a29@thorsis.com/T/#u Link: https://patch.msgid.link/20240926090356.105789-1-ada@thorsis.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/atmel-quadspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index bf7999ac22c8..6f9e9d871677 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -377,7 +377,7 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq, */ if (!(aq->mr & QSPI_MR_SMM)) { aq->mr |= QSPI_MR_SMM; - atmel_qspi_write(aq->scr, aq, QSPI_MR); + atmel_qspi_write(aq->mr, aq, QSPI_MR); } /* Clear pending interrupts */ From cada2646b7483cce370eb3b046659df31d9d34d1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2024 15:56:18 +0200 Subject: [PATCH 533/534] Revert: "dm-verity: restart or panic on an I/O error" commit 462763212dd71c41f092b48eaa352bc1f5ed5d66 upstream. This reverts commit e6a3531dd542cb127c8de32ab1e54a48ae19962b. The problem that the commit e6a3531dd542cb127c8de32ab1e54a48ae19962b fixes was reported as a security bug, but Google engineers working on Android and ChromeOS didn't want to change the default behavior, they want to get -EIO rather than restarting the system, so I am reverting that commit. Note also that calling machine_restart from the I/O handling code is potentially unsafe (the reboot notifiers may wait for the bio that triggered the restart), but Android uses the reboot notifiers to store the reboot reason into the PMU microcontroller, so machine_restart must be used. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: e6a3531dd542 ("dm-verity: restart or panic on an I/O error") Suggested-by: Sami Tolvanen Suggested-by: Will Drewry Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 442632e60e9c..3636387ce565 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -264,10 +264,8 @@ out: if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) { - pr_emerg("dm-verity device corrupted\n"); - emergency_restart(); - } + if (v->mode == DM_VERITY_MODE_RESTART) + kernel_restart("dm-verity device corrupted"); if (v->mode == DM_VERITY_MODE_PANIC) panic("dm-verity device corrupted"); @@ -691,23 +689,6 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_tasklet_enabled) || !io->in_tasklet) verity_fec_finish_io(io); - if (unlikely(status != BLK_STS_OK) && - unlikely(!(bio->bi_opf & REQ_RAHEAD)) && - !verity_is_system_shutting_down()) { - if (v->mode == DM_VERITY_MODE_RESTART || - v->mode == DM_VERITY_MODE_PANIC) - DMERR_LIMIT("%s has error: %s", v->data_dev->name, - blk_status_to_str(status)); - - if (v->mode == DM_VERITY_MODE_RESTART) { - pr_emerg("dm-verity device corrupted\n"); - emergency_restart(); - } - - if (v->mode == DM_VERITY_MODE_PANIC) - panic("dm-verity device corrupted"); - } - bio_endio(bio); } From 63a57420cf797edcfca41005dd6c805b77cfb596 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Oct 2024 16:30:05 +0200 Subject: [PATCH 534/534] Linux 6.6.54 Link: https://lore.kernel.org/r/20241002125751.964700919@linuxfoundation.org Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20241003103209.857606770@linuxfoundation.org Tested-by: Takeshi Ogasawara Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: Mark Brown Tested-by: Harshit Mogalapalli Tested-by: Peter Schneider Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Allen Pais Tested-by: Ron Economos Tested-by: Kexy Biscuit Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0158e14f0dd9..1e382bacd8ea 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 53 +SUBLEVEL = 54 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth