From de10ac47597e7a3596b27631d0d5ce5f48d2c099 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sun, 1 Sep 2019 12:54:10 +0200 Subject: [PATCH 001/591] iio: adc: meson_saradc: Fix memory allocation order meson_saradc's irq handler uses priv->regmap so make sure that it is allocated before the irq get enabled. This also fixes crash when CONFIG_DEBUG_SHIRQ is enabled, as device managed resources are freed in the inverted order they had been allocated, priv->regmap was freed before the spurious fake irq that CONFIG_DEBUG_SHIRQ adds called the handler. Fixes: 3af109131b7eb8 ("iio: adc: meson-saradc: switch from polling to interrupt mode") Reported-by: Elie Roudninski Signed-off-by: Remi Pommarel Reviewed-by: Martin Blumenstingl Tested-by: Elie ROUDNINSKI Reviewed-by: Kevin Hilman Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 7b28d045d271..7b27306330a3 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -1219,6 +1219,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); + priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, + priv->param->regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (!irq) return -EINVAL; @@ -1228,11 +1233,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (ret) return ret; - priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, - priv->param->regmap_config); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); - priv->clkin = devm_clk_get(&pdev->dev, "clkin"); if (IS_ERR(priv->clkin)) { dev_err(&pdev->dev, "failed to get clkin\n"); From 85ae3aeedeccb6febb0c6f9d5346d9c6419ad925 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 31 Aug 2019 13:18:22 +0200 Subject: [PATCH 002/591] iio: imu: st_lsm6dsx: forbid 0 sensor sensitivity Do not allow configuring null sensor gain since it will force to 0 device outputs Fixes: c8d4066c7246 ("iio: imu: st_lsm6dsx: remove invalid gain value for LSM9DS1") Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 ++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 28 +++++++++++++------- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c | 11 +++----- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 80e42c7dbcbe..0fe6999b8257 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -99,7 +99,9 @@ struct st_lsm6dsx_fs { #define ST_LSM6DSX_FS_LIST_SIZE 4 struct st_lsm6dsx_fs_table_entry { struct st_lsm6dsx_reg reg; + struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE]; + int fs_len; }; /** diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 2d3495560136..fd5ebe1e1594 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -145,6 +145,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(732), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -154,6 +155,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[0] = { IIO_DEGREE_TO_RAD(245), 0x0 }, .fs_avl[1] = { IIO_DEGREE_TO_RAD(500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(2000), 0x3 }, + .fs_len = 3, }, }, }, @@ -215,6 +217,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -225,6 +228,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -327,6 +331,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -337,6 +342,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -448,6 +454,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -458,6 +465,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -563,6 +571,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -573,6 +582,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -693,6 +703,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -703,6 +714,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -800,6 +812,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -810,6 +823,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -933,11 +947,12 @@ static int st_lsm6dsx_set_full_scale(struct st_lsm6dsx_sensor *sensor, int i, err; fs_table = &sensor->hw->settings->fs_table[sensor->id]; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) + for (i = 0; i < fs_table->fs_len; i++) { if (fs_table->fs_avl[i].gain == gain) break; + } - if (i == ST_LSM6DSX_FS_LIST_SIZE) + if (i == fs_table->fs_len) return -EINVAL; data = ST_LSM6DSX_SHIFT_VAL(fs_table->fs_avl[i].val, @@ -1196,18 +1211,13 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev, { struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); const struct st_lsm6dsx_fs_table_entry *fs_table; - enum st_lsm6dsx_sensor_id id = sensor->id; struct st_lsm6dsx_hw *hw = sensor->hw; int i, len = 0; - fs_table = &hw->settings->fs_table[id]; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) { - if (!fs_table->fs_avl[i].gain) - break; - + fs_table = &hw->settings->fs_table[sensor->id]; + for (i = 0; i < fs_table->fs_len; i++) len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", fs_table->fs_avl[i].gain); - } buf[len - 1] = '\n'; return len; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c index 66fbcd94642d..f5fca2171954 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c @@ -61,6 +61,7 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = { .gain = 1500, .val = 0x0, }, /* 1500 uG/LSB */ + .fs_len = 1, }, .temp_comp = { .addr = 0x60, @@ -555,13 +556,9 @@ static ssize_t st_lsm6dsx_shub_scale_avail(struct device *dev, int i, len = 0; settings = sensor->ext_info.settings; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) { - u16 val = settings->fs_table.fs_avl[i].gain; - - if (val > 0) - len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", - val); - } + for (i = 0; i < settings->fs_table.fs_len; i++) + len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", + settings->fs_table.fs_avl[i].gain); buf[len - 1] = '\n'; return len; From 6c59a962e081df6d8fe43325bbfabec57e0d4751 Mon Sep 17 00:00:00 2001 From: Pascal Bouwmann Date: Thu, 29 Aug 2019 07:29:41 +0200 Subject: [PATCH 003/591] iio: fix center temperature of bmc150-accel-core The center temperature of the supported devices stored in the constant BMC150_ACCEL_TEMP_CENTER_VAL is not 24 degrees but 23 degrees. It seems that some datasheets were inconsistent on this value leading to the error. For most usecases will only make minor difference so not queued for stable. Signed-off-by: Pascal Bouwmann Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index cf6c0e3a83d3..121b4e89f038 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -117,7 +117,7 @@ #define BMC150_ACCEL_SLEEP_1_SEC 0x0F #define BMC150_ACCEL_REG_TEMP 0x08 -#define BMC150_ACCEL_TEMP_CENTER_VAL 24 +#define BMC150_ACCEL_TEMP_CENTER_VAL 23 #define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2)) #define BMC150_AUTO_SUSPEND_DELAY_MS 2000 From b6749e20d5710c955fc6d4322f8fd98c915b2573 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 1 Sep 2019 22:12:35 +0100 Subject: [PATCH 004/591] arm64: KVM: Drop hyp_alternate_select for checking for ARM64_WORKAROUND_834220 There is no reason for using hyp_alternate_select when checking for ARM64_WORKAROUND_834220, as each of the capabilities is also backed by a static key. Just replace the KVM-specific construct with cpus_have_const_cap(ARM64_WORKAROUND_834220). Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Reviewed-by: Andrew Jones --- arch/arm64/kvm/hyp/switch.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index adaf266d8de8..a15baca9aca0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -229,20 +229,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) } } -static bool __hyp_text __true_value(void) -{ - return true; -} - -static bool __hyp_text __false_value(void) -{ - return false; -} - -static hyp_alternate_select(__check_arm_834220, - __false_value, __true_value, - ARM64_WORKAROUND_834220); - static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -298,7 +284,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) * resolve the IPA using the AT instruction. */ if (!(esr & ESR_ELx_S1PTW) && - (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (cpus_have_const_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { From aa979fa899c5cf592e63ce7b34bc767224225c6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 1 Sep 2019 22:12:36 +0100 Subject: [PATCH 005/591] arm64: KVM: Replace hyp_alternate_select with has_vhe() Given that the TLB invalidation path is pretty rarely used, there was never any advantage to using hyp_alternate_select() here. has_vhe(), being a glorified static key, is the right tool for the job. Off you go. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Reviewed-by: Andrew Jones --- arch/arm64/kvm/hyp/tlb.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c466060b76d6..eb0efc5557f3 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, isb(); } -static hyp_alternate_select(__tlb_switch_to_guest, - __tlb_switch_to_guest_nvhe, - __tlb_switch_to_guest_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_guest_vhe(kvm, cxt); + else + __tlb_switch_to_guest_nvhe(kvm, cxt); +} static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, struct tlb_inv_context *cxt) @@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, write_sysreg(0, vttbr_el2); } -static hyp_alternate_select(__tlb_switch_to_host, - __tlb_switch_to_host_nvhe, - __tlb_switch_to_host_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_host_vhe(kvm, cxt); + else + __tlb_switch_to_host_nvhe(kvm, cxt); +} void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); /* * We could do so much better if we had the VA as well. @@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (!has_vhe() && icache_is_vpipt()) __flush_icache_all(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_flush_vm_context(void) From 084b5a80e87258997632dbd975ec3e5fda6bb943 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 1 Sep 2019 22:12:37 +0100 Subject: [PATCH 006/591] arm64: KVM: Kill hyp_alternate_select() hyp_alternate_select() is now completely unused. Goodbye. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Reviewed-by: Andrew Jones --- arch/arm64/include/asm/kvm_hyp.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 86825aa20852..97f21cc66657 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,30 +47,6 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) -/** - * hyp_alternate_select - Generates patchable code sequences that are - * used to switch between two implementations of a function, depending - * on the availability of a feature. - * - * @fname: a symbol name that will be defined as a function returning a - * function pointer whose type will match @orig and @alt - * @orig: A pointer to the default function, as returned by @fname when - * @cond doesn't hold - * @alt: A pointer to the alternate function, as returned by @fname - * when @cond holds - * @cond: a CPU feature (as described in asm/cpufeature.h) - */ -#define hyp_alternate_select(fname, orig, alt, cond) \ -typeof(orig) * __hyp_text fname(void) \ -{ \ - typeof(alt) *val = orig; \ - asm volatile(ALTERNATIVE("nop \n", \ - "mov %0, %1 \n", \ - cond) \ - : "+r" (val) : "r" (alt)); \ - return val; \ -} - int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); From aac60f1a867773de9eb164013d89c99f3ea1f009 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 11 Sep 2019 02:33:35 +0000 Subject: [PATCH 007/591] KVM: arm/arm64: vgic: Use the appropriate TRACE_INCLUDE_PATH Commit 49dfe94fe5ad ("KVM: arm/arm64: Fix TRACE_INCLUDE_PATH") fixes TRACE_INCLUDE_PATH to the correct relative path to the define_trace.h and explains why did the old one work. The same fix should be applied to virt/kvm/arm/vgic/trace.h. Reviewed-by: Masahiro Yamada Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h index 55fed77a9f73..4fd4f6db181b 100644 --- a/virt/kvm/arm/vgic/trace.h +++ b/virt/kvm/arm/vgic/trace.h @@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending, #endif /* _TRACE_VGIC_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic +#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace From 7fd25e6fc035f4b04b75bca6d7e8daa069603a76 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Sep 2019 14:12:34 +0200 Subject: [PATCH 008/591] ieee802154: atusb: fix use-after-free at disconnect The disconnect callback was accessing the hardware-descriptor private data after having having freed it. Fixes: 7490b008d123 ("ieee802154: add support for atusb transceiver") Cc: stable # 4.2 Cc: Alexander Aring Reported-by: syzbot+f4509a9138a1472e7e80@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index ceddb424f887..0dd0ba915ab9 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1137,10 +1137,11 @@ static void atusb_disconnect(struct usb_interface *interface) ieee802154_unregister_hw(atusb->hw); + usb_put_dev(atusb->usb_dev); + ieee802154_free_hw(atusb->hw); usb_set_intfdata(interface, NULL); - usb_put_dev(atusb->usb_dev); pr_debug("%s done\n", __func__); } From d4f4de5e5ef8efde85febb6876cd3c8ab1631999 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Sep 2019 12:12:39 -0400 Subject: [PATCH 009/591] Fix the locking in dcache_readdir() and friends There are two problems in dcache_readdir() - one is that lockless traversal of the list needs non-trivial cooperation of d_alloc() (at least a switch to list_add_rcu(), and probably more than just that) and another is that it assumes that no removal will happen without the directory locked exclusive. Said assumption had always been there, never had been stated explicitly and is violated by several places in the kernel (devpts and selinuxfs). * replacement of next_positive() with different calling conventions: it returns struct list_head * instead of struct dentry *; the latter is passed in and out by reference, grabbing the result and dropping the original value. * scan is under ->d_lock. If we run out of timeslice, cursor is moved after the last position we'd reached and we reschedule; then the scan continues from that place. To avoid livelocks between multiple lseek() (with cursors getting moved past each other, never reaching the real entries) we always skip the cursors, need_resched() or not. * returned list_head * is either ->d_child of dentry we'd found or ->d_subdirs of parent (if we got to the end of the list). * dcache_readdir() and dcache_dir_lseek() switched to new helper. dcache_readdir() always holds a reference to dentry passed to dir_emit() now. Cursor is moved to just before the entry where dir_emit() has failed or into the very end of the list, if we'd run out. * move_cursor() eliminated - it had sucky calling conventions and after fixing that it became simply list_move() (in lseek and scan_positives) or list_move_tail() (in readdir). All operations with the list are under ->d_lock now, and we do not depend upon having all file removals done with parent locked exclusive anymore. Cc: stable@vger.kernel.org Reported-by: "zhengbin (A)" Signed-off-by: Al Viro --- fs/libfs.c | 134 +++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 65 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index c9b2850c0f7c..8e023b08a240 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -89,58 +89,47 @@ int dcache_dir_close(struct inode *inode, struct file *file) EXPORT_SYMBOL(dcache_dir_close); /* parent is locked at least shared */ -static struct dentry *next_positive(struct dentry *parent, - struct list_head *from, - int count) +/* + * Returns an element of siblings' list. + * We are looking for th positive after

; if + * found, dentry is grabbed and passed to caller via *. + * If no such element exists, the anchor of list is returned + * and * is set to NULL. + */ +static struct list_head *scan_positives(struct dentry *cursor, + struct list_head *p, + loff_t count, + struct dentry **res) { - unsigned *seq = &parent->d_inode->i_dir_seq, n; - struct dentry *res; - struct list_head *p; - bool skipped; - int i; + struct dentry *dentry = cursor->d_parent, *found = NULL; -retry: - i = count; - skipped = false; - n = smp_load_acquire(seq) & ~1; - res = NULL; - rcu_read_lock(); - for (p = from->next; p != &parent->d_subdirs; p = p->next) { + spin_lock(&dentry->d_lock); + while ((p = p->next) != &dentry->d_subdirs) { struct dentry *d = list_entry(p, struct dentry, d_child); - if (!simple_positive(d)) { - skipped = true; - } else if (!--i) { - res = d; - break; + // we must at least skip cursors, to avoid livelocks + if (d->d_flags & DCACHE_DENTRY_CURSOR) + continue; + if (simple_positive(d) && !--count) { + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(d)) + found = dget_dlock(d); + spin_unlock(&d->d_lock); + if (likely(found)) + break; + count = 1; + } + if (need_resched()) { + list_move(&cursor->d_child, p); + p = &cursor->d_child; + spin_unlock(&dentry->d_lock); + cond_resched(); + spin_lock(&dentry->d_lock); } } - rcu_read_unlock(); - if (skipped) { - smp_rmb(); - if (unlikely(*seq != n)) - goto retry; - } - return res; -} - -static void move_cursor(struct dentry *cursor, struct list_head *after) -{ - struct dentry *parent = cursor->d_parent; - unsigned n, *seq = &parent->d_inode->i_dir_seq; - spin_lock(&parent->d_lock); - for (;;) { - n = *seq; - if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) - break; - cpu_relax(); - } - __list_del(cursor->d_child.prev, cursor->d_child.next); - if (after) - list_add(&cursor->d_child, after); - else - list_add_tail(&cursor->d_child, &parent->d_subdirs); - smp_store_release(seq, n + 2); - spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + dput(*res); + *res = found; + return p; } loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) @@ -158,17 +147,28 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) return -EINVAL; } if (offset != file->f_pos) { - file->f_pos = offset; - if (file->f_pos >= 2) { - struct dentry *cursor = file->private_data; - struct dentry *to; - loff_t n = file->f_pos - 2; + struct dentry *cursor = file->private_data; + struct dentry *to = NULL; + struct list_head *p; - inode_lock_shared(dentry->d_inode); - to = next_positive(dentry, &dentry->d_subdirs, n); - move_cursor(cursor, to ? &to->d_child : NULL); - inode_unlock_shared(dentry->d_inode); + file->f_pos = offset; + inode_lock_shared(dentry->d_inode); + + if (file->f_pos > 2) { + p = scan_positives(cursor, &dentry->d_subdirs, + file->f_pos - 2, &to); + spin_lock(&dentry->d_lock); + list_move(&cursor->d_child, p); + spin_unlock(&dentry->d_lock); + } else { + spin_lock(&dentry->d_lock); + list_del_init(&cursor->d_child); + spin_unlock(&dentry->d_lock); } + + dput(to); + + inode_unlock_shared(dentry->d_inode); } return offset; } @@ -190,25 +190,29 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p = &cursor->d_child; - struct dentry *next; - bool moved = false; + struct list_head *anchor = &dentry->d_subdirs; + struct dentry *next = NULL; + struct list_head *p; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) - p = &dentry->d_subdirs; - while ((next = next_positive(dentry, p, 1)) != NULL) { + p = anchor; + else + p = &cursor->d_child; + + while ((p = scan_positives(cursor, p, 1, &next)) != anchor) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) break; - moved = true; - p = &next->d_child; ctx->pos++; } - if (moved) - move_cursor(cursor, p); + spin_lock(&dentry->d_lock); + list_move_tail(&cursor->d_child, p); + spin_unlock(&dentry->d_lock); + dput(next); + return 0; } EXPORT_SYMBOL(dcache_readdir); From 8581d51055a08cc6eb061c8856062290e8582ce4 Mon Sep 17 00:00:00 2001 From: "Lowry Li (Arm Technology China)" Date: Wed, 31 Jul 2019 11:04:38 +0000 Subject: [PATCH 010/591] drm: Free the writeback_job when it with an empty fb Adds the check if the writeback_job with an empty fb, then it should be freed in atomic_check phase. With this change, the driver users will not check empty fb case any more. So refined accordingly. Signed-off-by: Lowry Li (Arm Technology China) Reviewed-by: Liviu Dudau Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/1564571048-15029-2-git-send-email-lowry.li@arm.com --- .../drm/arm/display/komeda/komeda_wb_connector.c | 3 +-- drivers/gpu/drm/arm/malidp_mw.c | 4 ++-- drivers/gpu/drm/drm_atomic.c | 13 +++++++++---- drivers/gpu/drm/rcar-du/rcar_du_writeback.c | 4 ++-- drivers/gpu/drm/vc4/vc4_txp.c | 5 ++--- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index 2851cac94d86..23fbee268119 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -43,9 +43,8 @@ komeda_wb_encoder_atomic_check(struct drm_encoder *encoder, struct komeda_data_flow_cfg dflow; int err; - if (!writeback_job || !writeback_job->fb) { + if (!writeback_job) return 0; - } if (!crtc_st->active) { DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n"); diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index 2e812525025d..a59227b2cdb5 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -130,7 +130,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder, struct drm_framebuffer *fb; int i, n_planes; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -247,7 +247,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm, mw_state = to_mw_state(conn_state); - if (conn_state->writeback_job && conn_state->writeback_job->fb) { + if (conn_state->writeback_job) { struct drm_framebuffer *fb = conn_state->writeback_job->fb; DRM_DEV_DEBUG_DRIVER(drm->dev, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 419381abbdd1..14aeaf736321 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -430,10 +430,15 @@ static int drm_atomic_connector_check(struct drm_connector *connector, return -EINVAL; } - if (writeback_job->out_fence && !writeback_job->fb) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", - connector->base.id, connector->name); - return -EINVAL; + if (!writeback_job->fb) { + if (writeback_job->out_fence) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", + connector->base.id, connector->name); + return -EINVAL; + } + + drm_writeback_cleanup_job(writeback_job); + state->writeback_job = NULL; } return 0; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c index ae07290bba6a..04efa78d70b6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c @@ -147,7 +147,7 @@ static int rcar_du_wb_enc_atomic_check(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct drm_framebuffer *fb; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -221,7 +221,7 @@ void rcar_du_writeback_setup(struct rcar_du_crtc *rcrtc, unsigned int i; state = rcrtc->writeback.base.state; - if (!state || !state->writeback_job || !state->writeback_job->fb) + if (!state || !state->writeback_job) return; fb = state->writeback_job->fb; diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 96f91c1b4b6e..e92fa1275034 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -229,7 +229,7 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, int i; conn_state = drm_atomic_get_new_connector_state(state, conn); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); @@ -269,8 +269,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, u32 ctrl; int i; - if (WARN_ON(!conn_state->writeback_job || - !conn_state->writeback_job->fb)) + if (WARN_ON(!conn_state->writeback_job)) return; mode = &conn_state->crtc->state->adjusted_mode; From b1066a123538044117f0a78ba8c6a50cf5a04c86 Mon Sep 17 00:00:00 2001 From: "Lowry Li (Arm Technology China)" Date: Wed, 31 Jul 2019 11:04:45 +0000 Subject: [PATCH 011/591] drm: Clear the fence pointer when writeback job signaled During it signals the completion of a writeback job, after releasing the out_fence, we'd clear the pointer. Check if fence left over in drm_writeback_cleanup_job(), release it. Signed-off-by: Lowry Li (Arm Technology China) Reviewed-by: Brian Starkey Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/1564571048-15029-3-git-send-email-lowry.li@arm.com --- drivers/gpu/drm/drm_writeback.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index ff138b6ec48b..43d9e3bb3a94 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job) if (job->fb) drm_framebuffer_put(job->fb); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); } EXPORT_SYMBOL(drm_writeback_cleanup_job); @@ -366,25 +369,29 @@ drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector, { unsigned long flags; struct drm_writeback_job *job; + struct dma_fence *out_fence; spin_lock_irqsave(&wb_connector->job_lock, flags); job = list_first_entry_or_null(&wb_connector->job_queue, struct drm_writeback_job, list_entry); - if (job) { + if (job) list_del(&job->list_entry); - if (job->out_fence) { - if (status) - dma_fence_set_error(job->out_fence, status); - dma_fence_signal(job->out_fence); - dma_fence_put(job->out_fence); - } - } + spin_unlock_irqrestore(&wb_connector->job_lock, flags); if (WARN_ON(!job)) return; + out_fence = job->out_fence; + if (out_fence) { + if (status) + dma_fence_set_error(out_fence, status); + dma_fence_signal(out_fence); + dma_fence_put(out_fence); + job->out_fence = NULL; + } + INIT_WORK(&job->cleanup_work, cleanup_work); queue_work(system_long_wq, &job->cleanup_work); } From 0ec64895b05269c1814ea5befcadcd1184a12915 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Tue, 17 Sep 2019 14:52:00 -0400 Subject: [PATCH 012/591] nvmet: change ppl to lpp In nvmet_bdev_set_limits() the number of logical blocks per physical block is calculated, but the opposite is mentioned in the associated comment and reflected in the variable name. Correct the comment and adjust the variable name to reflect the calculation done. Signed-off-by: John Pittman Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/target/io-cmd-bdev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index de0bff70ebb6..32008d85172b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -11,10 +11,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of physical blocks per logical block. */ - const u32 ppl = ql->physical_block_size / ql->logical_block_size; - /* Physical blocks per logical block, 0's based. */ - const __le16 ppl0b = to0based(ppl); + /* Number of logical blocks per physical block. */ + const u32 lpp = ql->physical_block_size / ql->logical_block_size; + /* Logical blocks per physical block, 0's based. */ + const __le16 lpp0b = to0based(lpp); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) * field from the identify controller data structure should be used. */ id->nsfeat |= 1 << 1; - id->nawun = ppl0b; - id->nawupf = ppl0b; - id->nacwu = ppl0b; + id->nawun = lpp0b; + id->nawupf = lpp0b; + id->nacwu = lpp0b; /* * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) */ id->nsfeat |= 1 << 4; /* NPWG = Namespace Preferred Write Granularity. 0's based */ - id->npwg = ppl0b; + id->npwg = lpp0b; /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ From b224726de5e496dbf78147a66755c3d81e28bdd2 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 18 Sep 2019 00:27:20 +0000 Subject: [PATCH 013/591] nvme-pci: Fix a race in controller removal User space programs like udevd may try to read to partitions at the same time the driver detects a namespace is unusable, and may deadlock if revalidate_disk() is called while such a process is waiting to enter the frozen queue. On detecting a dead namespace, move the disk revalidate after unblocking dispatchers that may be holding bd_butex. changelog Suggested-by: Keith Busch Signed-off-by: Balbir Singh Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108f60b46804..0c385b1994fe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) */ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); + /* + * Revalidate after unblocking dispatchers that may be holding bd_butex + */ + revalidate_disk(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) From c9c53749375ccce0191f89b86732e642f914bd82 Mon Sep 17 00:00:00 2001 From: Laurence Oberman Date: Wed, 11 Sep 2019 09:56:42 -0400 Subject: [PATCH 014/591] scsi: bnx2fc: Handle scope bits when array returns BUSY or TSF The qla2xxx driver had this issue as well when the newer array firmware returned the retry_delay_timer in the fcp_rsp. The bnx2fc is not handling the masking of the scope bits either so the retry_delay_timestamp value lands up being a large value added to the timer timestamp delaying I/O for up to 27 Minutes. This patch adds similar code to handle this to the bnx2fc driver to avoid the huge delay. Link: https://lore.kernel.org/r/1568210202-12794-1-git-send-email-loberman@redhat.com Signed-off-by: Laurence Oberman Reported-by: David Jeffery Reported-by: kbuild test robot Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index da00ca5fa5dc..401743e2b429 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1923,6 +1923,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, struct fcoe_fcp_rsp_payload *fcp_rsp; struct bnx2fc_rport *tgt = io_req->tgt; struct scsi_cmnd *sc_cmd; + u16 scope = 0, qualifier = 0; /* scsi_cmd_cmpl is called with tgt lock held */ @@ -1990,12 +1991,30 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL || io_req->cdb_status == SAM_STAT_BUSY) { - /* Set the jiffies + retry_delay_timer * 100ms - for the rport/tgt */ - tgt->retry_delay_timestamp = jiffies + - fcp_rsp->retry_delay_timer * HZ / 10; + /* Newer array firmware with BUSY or + * TASK_SET_FULL may return a status that needs + * the scope bits masked. + * Or a huge delay timestamp up to 27 minutes + * can result. + */ + if (fcp_rsp->retry_delay_timer) { + /* Upper 2 bits */ + scope = fcp_rsp->retry_delay_timer + & 0xC000; + /* Lower 14 bits */ + qualifier = fcp_rsp->retry_delay_timer + & 0x3FFF; + } + if (scope > 0 && qualifier > 0 && + qualifier <= 0x3FEF) { + /* Set the jiffies + + * retry_delay_timer * 100ms + * for the rport/tgt + */ + tgt->retry_delay_timestamp = jiffies + + (qualifier * HZ / 10); + } } - } if (io_req->fcp_resid) scsi_set_resid(sc_cmd, io_req->fcp_resid); From f51913eef23f74c3bd07899dc7f1ed6df9e521d8 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Wed, 18 Sep 2019 12:20:38 +0800 Subject: [PATCH 015/591] scsi: ufs: skip shutdown if hba is not powered In some cases, hba may go through shutdown flow without successful initialization and then make system hang. For example, if ufshcd_change_power_mode() gets error and leads to ufshcd_hba_exit() to release resources of the host, future shutdown flow may hang the system since the host register will be accessed in unpowered state. To solve this issue, simply add checking to skip shutdown for above kind of situation. Link: https://lore.kernel.org/r/1568780438-28753-1-git-send-email-stanley.chu@mediatek.com Signed-off-by: Stanley Chu Acked-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c4a015e42045..57b2c3a8def3 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8140,6 +8140,9 @@ int ufshcd_shutdown(struct ufs_hba *hba) { int ret = 0; + if (!hba->is_powered) + goto out; + if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; From 4b062e7cf940aa4f38fedc3a964b24cb095373f0 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Thu, 19 Sep 2019 16:55:48 +0900 Subject: [PATCH 016/591] scsi: qedf: Remove always false 'tmp_prio < 0' statement Since tmp_prio is declared as u8, the following statement is always false. tmp_prio < 0 So remove 'always false' statement. Link: https://lore.kernel.org/r/20190919075548.GA112801@LGEARND20B15 Signed-off-by: Austin Kim Acked-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 9c24f3834d70..1d2c111bdf82 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type) tmp_prio = get->operational.app_prio.fcoe; if (qedf_default_prio > -1) qedf->prio = qedf_default_prio; - else if (tmp_prio < 0 || tmp_prio > 7) { + else if (tmp_prio > 7) { QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "FIP/FCoE prio %d out of range, setting to %d.\n", tmp_prio, QEDF_DEFAULT_PRIO); From 0ed8810276907c8a633dc8cecc48dabb6678cd23 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 6 Sep 2019 10:24:20 -0700 Subject: [PATCH 017/591] scsi: storvsc: setup 1:1 mapping between hardware queue and CPU queue storvsc doesn't use a dedicated hardware queue for a given CPU queue. When issuing I/O, it selects returning CPU (hardware queue) dynamically based on vmbus channel usage across all channels. This patch advertises num_present_cpus() as number of hardware queues. This will have upper layer setup 1:1 mapping between hardware queue and CPU queue and avoid unnecessary locking when issuing I/O. Link: https://lore.kernel.org/r/1567790660-48142-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li Reviewed-by: Michael Kelley Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ed8b9ac805e6..542d2bac2922 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1837,8 +1837,7 @@ static int storvsc_probe(struct hv_device *device, /* * Set the number of HW queues we are supporting. */ - if (stor_device->num_sc != 0) - host->nr_hw_queues = stor_device->num_sc + 1; + host->nr_hw_queues = num_present_cpus(); /* * Set the error handler work queue. From 70054aa39a013fa52eff432f2223b8bd5c0048f8 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Sat, 7 Sep 2019 09:07:30 +0800 Subject: [PATCH 018/591] scsi: megaraid: disable device when probe failed after enabled device For pci device, need to disable device when probe failed after enabled device. Link: https://lore.kernel.org/r/1567818450-173315-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Xiang Chen Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 45a66048801b..ff6d4aa92421 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4183,11 +4183,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ && pdev->subsystem_device == 0xC000) - return -ENODEV; + goto out_disable_device; /* Now check the magic signature byte */ pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic); if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE) - return -ENODEV; + goto out_disable_device; /* Ok it is probably a megaraid */ } From 4b6b1bb68628ec49e4cbfabd8ac17a169f079cd8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 23 Sep 2019 13:40:35 +0800 Subject: [PATCH 019/591] scsi: hisi_sas: Make three functions static Fix sparse warnings: drivers/scsi/hisi_sas/hisi_sas_main.c:3686:6: warning: symbol 'hisi_sas_debugfs_release' was not declared. Should it be static? drivers/scsi/hisi_sas/hisi_sas_main.c:3708:5: warning: symbol 'hisi_sas_debugfs_alloc' was not declared. Should it be static? drivers/scsi/hisi_sas/hisi_sas_main.c:3799:6: warning: symbol 'hisi_sas_debugfs_bist_init' was not declared. Should it be static? Link: https://lore.kernel.org/r/20190923054035.19036-1-yuehaibing@huawei.com Reported-by: Hulk Robot Signed-off-by: YueHaibing Acked-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d1513fdf1e00..0847e682797b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -3683,7 +3683,7 @@ void hisi_sas_debugfs_work_handler(struct work_struct *work) } EXPORT_SYMBOL_GPL(hisi_sas_debugfs_work_handler); -void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; int i; @@ -3705,7 +3705,7 @@ void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) devm_kfree(dev, hisi_hba->debugfs_port_reg[i]); } -int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) +static int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; @@ -3796,7 +3796,7 @@ fail: return -ENOMEM; } -void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) { hisi_hba->debugfs_bist_dentry = debugfs_create_dir("bist", hisi_hba->debugfs_dir); From 248a445adfc8c33ffd67cf1f2e336578e34f9e21 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Thu, 12 Sep 2019 11:09:05 -0700 Subject: [PATCH 020/591] scsi: qla2xxx: Silence fwdump template message Print if fwdt template is present or not, only when ql2xextended_error_logging is enabled. Link: https://lore.kernel.org/r/20190912180918.6436-2-hmadhani@marvell.com Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b6cdf108994c..f6a5d78f93c9 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3190,7 +3190,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) for (j = 0; j < 2; j++, fwdt++) { if (!fwdt->template) { - ql_log(ql_log_warn, vha, 0x00ba, + ql_dbg(ql_dbg_init, vha, 0x00ba, "-> fwdt%u no template\n", j); continue; } From c3b6a1d397420a0fdd97af2f06abfb78adc370df Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:06 -0700 Subject: [PATCH 021/591] scsi: qla2xxx: Fix unbound sleep in fcport delete path. There are instances, though rare, where a LOGO request cannot be sent out and the thread in free session done can wait indefinitely. Fix this by putting an upper bound to sleep. Link: https://lore.kernel.org/r/20190912180918.6436-3-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0ffda6171614..b58ecd2d7fb6 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1020,6 +1020,7 @@ void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; + u16 cnt = 0; while (!READ_ONCE(sess->logout_completed)) { if (!traced) { @@ -1029,6 +1030,9 @@ void qlt_free_session_done(struct work_struct *work) traced = true; } msleep(100); + cnt++; + if (cnt > 200) + break; } ql_dbg(ql_dbg_disc, vha, 0xf087, From fd5564ba54e0d8a9e3e823d311b764232e09eb5f Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:07 -0700 Subject: [PATCH 022/591] scsi: qla2xxx: Fix stale mem access on driver unload On driver unload, 'remove_one' thread was allowed to advance, while session cleanup still lag behind. This patch ensures session deletion will finish before remove_one can advance. Link: https://lore.kernel.org/r/20190912180918.6436-4-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 1 + drivers/scsi/qla2xxx/qla_target.c | 21 ++++++++------------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 51154c049385..42980e52fb41 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1118,6 +1118,7 @@ qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) qla2x00_mark_all_devices_lost(vha, 0); wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), 10*HZ); + flush_workqueue(vha->hw->wq); } /* diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b58ecd2d7fb6..b5315be00b4d 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -953,7 +953,7 @@ void qlt_free_session_done(struct work_struct *work) struct qla_hw_data *ha = vha->hw; unsigned long flags; bool logout_started = false; - scsi_qla_host_t *base_vha; + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); struct qlt_plogi_ack_t *own = sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN]; @@ -1105,6 +1105,7 @@ void qlt_free_session_done(struct work_struct *work) } spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + sess->free_pending = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p %8phC finished fcp_cnt %d\n", @@ -1113,17 +1114,8 @@ void qlt_free_session_done(struct work_struct *work) if (tgt && (tgt->sess_count == 0)) wake_up_all(&tgt->waitQ); - if (vha->fcport_count == 0) - wake_up_all(&vha->fcport_waitQ); - - base_vha = pci_get_drvdata(ha->pdev); - - sess->free_pending = 0; - - if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags)) - return; - - if ((!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { + if (!test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags) && + (!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { switch (vha->host->active_mode) { case MODE_INITIATOR: case MODE_DUAL: @@ -1136,6 +1128,9 @@ void qlt_free_session_done(struct work_struct *work) break; } } + + if (vha->fcport_count == 0) + wake_up_all(&vha->fcport_waitQ); } /* ha->tgt.sess_lock supposed to be held on entry */ @@ -1165,7 +1160,7 @@ void qlt_unreg_sess(struct fc_port *sess) sess->last_login_gen = sess->login_gen; INIT_WORK(&sess->free_work, qlt_free_session_done); - schedule_work(&sess->free_work); + queue_work(sess->vha->hw->wq, &sess->free_work); } EXPORT_SYMBOL(qlt_unreg_sess); From f5187b7d1ac66b61676f896751d3af9fcf8dd592 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:08 -0700 Subject: [PATCH 023/591] scsi: qla2xxx: Optimize NPIV tear down process In the case of NPIV port is being torn down, this patch will set a flag to indicate VPORT_DELETE. This would prevent relogin to be triggered. Link: https://lore.kernel.org/r/20190912180918.6436-5-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 2 ++ drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_gs.c | 3 ++- drivers/scsi/qla2xxx/qla_mid.c | 32 +++++++++++++++++++++---------- drivers/scsi/qla2xxx/qla_os.c | 7 ++++++- drivers/scsi/qla2xxx/qla_target.c | 1 + 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index e9c449ef515c..8b3015361428 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2920,6 +2920,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) struct qla_hw_data *ha = vha->hw; uint16_t id = vha->vp_idx; + set_bit(VPORT_DELETE, &vha->dpc_flags); + while (test_bit(LOOP_RESYNC_ACTIVE, &vha->dpc_flags) || test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 873a6aef1c5c..fa038568beb6 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4394,6 +4394,7 @@ typedef struct scsi_qla_host { #define IOCB_WORK_ACTIVE 31 #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 +#define VPORT_DELETE 34 unsigned long pci_flags; #define PFLG_DISCONNECTED 0 /* PCI device removed */ diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index dc0e36676313..5298ed10059f 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3102,7 +3102,8 @@ int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id) { struct qla_work_evt *e; - if (test_bit(UNLOADING, &vha->dpc_flags)) + if (test_bit(UNLOADING, &vha->dpc_flags) || + (vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags))) return 0; e = qla2x00_alloc_work(vha, QLA_EVT_GPNID); diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 1a9a11ae7285..6afad68e5ba2 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -66,6 +66,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) uint16_t vp_id; struct qla_hw_data *ha = vha->hw; unsigned long flags = 0; + u8 i; mutex_lock(&ha->vport_lock); /* @@ -75,8 +76,9 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ - wait_event_timeout(vha->vref_waitq, !atomic_read(&vha->vref_count), - 10*HZ); + for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++) + wait_event_timeout(vha->vref_waitq, + atomic_read(&vha->vref_count), HZ); spin_lock_irqsave(&ha->vport_slock, flags); if (atomic_read(&vha->vref_count)) { @@ -262,6 +264,9 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) spin_lock_irqsave(&ha->vport_slock, flags); list_for_each_entry(vha, &ha->vp_list, list) { if (vha->vp_idx) { + if (test_bit(VPORT_DELETE, &vha->dpc_flags)) + continue; + atomic_inc(&vha->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); @@ -300,6 +305,20 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) int qla2x00_vp_abort_isp(scsi_qla_host_t *vha) { + fc_port_t *fcport; + + /* + * To exclusively reset vport, we need to log it out first. + * Note: This control_vp can fail if ISP reset is already + * issued, this is expected, as the vp would be already + * logged out due to ISP reset. + */ + if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) { + qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->logout_on_delete = 0; + } + /* * Physical port will do most of the abort and recovery work. We can * just treat it as a loop down @@ -312,16 +331,9 @@ qla2x00_vp_abort_isp(scsi_qla_host_t *vha) atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); } - /* - * To exclusively reset vport, we need to log it out first. Note: this - * control_vp can fail if ISP reset is already issued, this is - * expected, as the vp would be already logged out due to ISP reset. - */ - if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) - qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); - ql_dbg(ql_dbg_taskm, vha, 0x801d, "Scheduling enable of Vport %d.\n", vha->vp_idx); + return qla24xx_enable_vp(vha); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 42980e52fb41..5eda86ff6606 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1115,9 +1115,14 @@ static inline int test_fcport_count(scsi_qla_host_t *vha) void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) { + u8 i; + qla2x00_mark_all_devices_lost(vha, 0); - wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), 10*HZ); + for (i = 0; i < 10; i++) + wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), + HZ); + flush_workqueue(vha->hw->wq); } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b5315be00b4d..a06e56224a55 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1115,6 +1115,7 @@ void qlt_free_session_done(struct work_struct *work) wake_up_all(&tgt->waitQ); if (!test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags) && + !(vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags)) && (!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { switch (vha->host->active_mode) { case MODE_INITIATOR: From 7f2a398d59d658818f3d219645164676fbbc88e8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:09 -0700 Subject: [PATCH 024/591] scsi: qla2xxx: Fix N2N link reset Fix stalled link recovery for N2N with FC-NVMe connection. Link: https://lore.kernel.org/r/20190912180918.6436-6-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 3 +- drivers/scsi/qla2xxx/qla_init.c | 105 +++++++++++++++++++++++--------- drivers/scsi/qla2xxx/qla_mbx.c | 23 ++++++- drivers/scsi/qla2xxx/qla_os.c | 4 ++ 4 files changed, 102 insertions(+), 33 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index fa038568beb6..6ffa9877c28b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2396,6 +2396,7 @@ typedef struct fc_port { unsigned int query:1; unsigned int id_changed:1; unsigned int scan_needed:1; + unsigned int n2n_flag:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; @@ -2446,7 +2447,6 @@ typedef struct fc_port { uint8_t fc4_type; uint8_t fc4f_nvme; uint8_t scan_state; - uint8_t n2n_flag; unsigned long last_queue_full; unsigned long last_ramp_up; @@ -3036,6 +3036,7 @@ enum scan_flags_t { enum fc4type_t { FS_FC4TYPE_FCP = BIT_0, FS_FC4TYPE_NVME = BIT_1, + FS_FCP_IS_N2N = BIT_7, }; struct fab_scan_rp { diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f6a5d78f93c9..67f522a8a9d4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -746,12 +746,15 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, break; default: if ((id.b24 != fcport->d_id.b24 && - fcport->d_id.b24) || + fcport->d_id.b24 && + fcport->loop_id != FC_NO_LOOP_ID) || (fcport->loop_id != FC_NO_LOOP_ID && fcport->loop_id != loop_id)) { ql_dbg(ql_dbg_disc, vha, 0x20e3, "%s %d %8phC post del sess\n", __func__, __LINE__, fcport->port_name); + if (fcport->n2n_flag) + fcport->d_id.b24 = 0; qlt_schedule_sess_for_deletion(fcport); return; } @@ -759,6 +762,8 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, } fcport->loop_id = loop_id; + if (fcport->n2n_flag) + fcport->d_id.b24 = id.b24; wwn = wwn_to_u64(fcport->port_name); qlt_find_sess_invalidate_other(vha, wwn, @@ -972,7 +977,7 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) wwn = wwn_to_u64(e->port_name); ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x20e8, - "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n", + "%s %8phC %02x:%02x:%02x CLS %x/%x lid %x \n", __func__, (void *)&wwn, e->port_id[2], e->port_id[1], e->port_id[0], e->current_login_state, e->last_login_state, (loop_id & 0x7fff)); @@ -1499,7 +1504,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) (fcport->fw_login_state == DSC_LS_PRLI_PEND))) return 0; - if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP && + !N2N_TOPO(vha->hw)) { if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; @@ -1570,8 +1576,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) qla24xx_post_gpdb_work(vha, fcport, 0); } else { ql_dbg(ql_dbg_disc, vha, 0x2118, - "%s %d %8phC post NVMe PRLI\n", - __func__, __LINE__, fcport->port_name); + "%s %d %8phC post %s PRLI\n", + __func__, __LINE__, fcport->port_name, + fcport->fc4f_nvme ? "NVME" : "FC"); qla24xx_post_prli_work(vha, fcport); } break; @@ -1853,17 +1860,38 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) break; } - if (ea->fcport->n2n_flag) { + if (ea->fcport->fc4f_nvme) { ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC post fc4 prli\n", __func__, __LINE__, ea->fcport->port_name); ea->fcport->fc4f_nvme = 0; - ea->fcport->n2n_flag = 0; qla24xx_post_prli_work(vha, ea->fcport); + return; + } + + /* at this point both PRLI NVME & PRLI FCP failed */ + if (N2N_TOPO(vha->hw)) { + if (ea->fcport->n2n_link_reset_cnt < 3) { + ea->fcport->n2n_link_reset_cnt++; + /* + * remote port is not sending Plogi. Reset + * link to kick start his state machine + */ + set_bit(N2N_LINK_RESET, &vha->dpc_flags); + } else { + ql_log(ql_log_warn, vha, 0x2119, + "%s %d %8phC Unable to reconnect\n", + __func__, __LINE__, ea->fcport->port_name); + } + } else { + /* + * switch connect. login failed. Take connection + * down and allow relogin to retrigger + */ + ea->fcport->flags &= ~FCF_ASYNC_SENT; + ea->fcport->keep_nport_handle = 0; + qlt_schedule_sess_for_deletion(ea->fcport); } - ql_dbg(ql_dbg_disc, vha, 0x2119, - "%s %d %8phC unhandle event of %x\n", - __func__, __LINE__, ea->fcport->port_name, ea->data[0]); break; } } @@ -5000,28 +5028,47 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) unsigned long flags; /* Inititae N2N login. */ - if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { - /* borrowing */ - u32 *bp, i, sz; + if (N2N_TOPO(ha)) { + if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { + /* borrowing */ + u32 *bp, i, sz; - memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct els_plogi_payload), - ha->init_cb_size); - rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, - (void *)ha->init_cb, sz); - if (rval == QLA_SUCCESS) { - bp = (uint32_t *)ha->init_cb; - for (i = 0; i < sz/4 ; i++, bp++) - *bp = cpu_to_be32(*bp); + memset(ha->init_cb, 0, ha->init_cb_size); + sz = min_t(int, sizeof(struct els_plogi_payload), + ha->init_cb_size); + rval = qla24xx_get_port_login_templ(vha, + ha->init_cb_dma, (void *)ha->init_cb, sz); + if (rval == QLA_SUCCESS) { + bp = (uint32_t *)ha->init_cb; + for (i = 0; i < sz/4 ; i++, bp++) + *bp = cpu_to_be32(*bp); - memcpy(&ha->plogi_els_payld.data, (void *)ha->init_cb, - sizeof(ha->plogi_els_payld.data)); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - } else { - ql_dbg(ql_dbg_init, vha, 0x00d1, - "PLOGI ELS param read fail.\n"); + memcpy(&ha->plogi_els_payld.data, + (void *)ha->init_cb, + sizeof(ha->plogi_els_payld.data)); + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + } else { + ql_dbg(ql_dbg_init, vha, 0x00d1, + "PLOGI ELS param read fail.\n"); + goto skip_login; + } + } + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->n2n_flag) { + qla24xx_fcport_handle_login(vha, fcport); + return QLA_SUCCESS; + } + } +skip_login: + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_retry++; + spin_unlock_irqrestore(&vha->work_lock, flags); + + if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); } - return QLA_SUCCESS; } found_devs = 0; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4c858e2d0ea8..6d6e10812b42 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2249,7 +2249,7 @@ qla2x00_lip_reset(scsi_qla_host_t *vha) mbx_cmd_t mc; mbx_cmd_t *mcp = &mc; - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x105a, + ql_dbg(ql_dbg_disc, vha, 0x105a, "Entered %s.\n", __func__); if (IS_CNA_CAPABLE(vha->hw)) { @@ -3883,14 +3883,23 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, case TOPO_N2N: ha->current_topology = ISP_CFG_N; spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; + } + fcport = qla2x00_find_fcport_by_wwpn(vha, rptid_entry->u.f1.port_name, 1); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); if (fcport) { fcport->plogi_nack_done_deadline = jiffies + HZ; - fcport->dm_login_expire = jiffies + 3*HZ; + fcport->dm_login_expire = jiffies + 2*HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->n2n_flag = 1; + if (vha->flags.nvme_enabled) + fcport->fc4f_nvme = 1; + switch (fcport->disc_state) { case DSC_DELETED: set_bit(RELOGIN_NEEDED, @@ -3924,7 +3933,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, rptid_entry->u.f1.port_name, rptid_entry->u.f1.node_name, NULL, - FC4_TYPE_UNKNOWN); + FS_FCP_IS_N2N); } /* if our portname is higher then initiate N2N login */ @@ -4023,6 +4032,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, list_for_each_entry(fcport, &vha->vp_fcports, list) { fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; } fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -4032,6 +4042,13 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->login_retry = vha->hw->login_retry_count; fcport->plogi_nack_done_deadline = jiffies + HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->n2n_flag = 1; + fcport->d_id.b.domain = + rptid_entry->u.f2.remote_nport_id[2]; + fcport->d_id.b.area = + rptid_entry->u.f2.remote_nport_id[1]; + fcport->d_id.b.al_pa = + rptid_entry->u.f2.remote_nport_id[0]; } } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5eda86ff6606..8411dd5acd43 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5033,6 +5033,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) memcpy(fcport->port_name, e->u.new_sess.port_name, WWN_SIZE); + + if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) + fcport->n2n_flag = 1; + } else { ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %8phC mem alloc fail.\n", From f3f1938bb673b1b5ad182c4608f5f8a24921eea3 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:10 -0700 Subject: [PATCH 025/591] scsi: qla2xxx: Fix N2N link up fail During link up/bounce, qla driver would do command flush as part of cleanup. In this case, the flush can intefere with FW state. This patch allows FW to be in control of link up. Link: https://lore.kernel.org/r/20190912180918.6436-7-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mbx.c | 2 ++ drivers/scsi/qla2xxx/qla_os.c | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 6d6e10812b42..1cc6913f76c4 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3897,6 +3897,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->dm_login_expire = jiffies + 2*HZ; fcport->scan_state = QLA_FCPORT_FOUND; fcport->n2n_flag = 1; + fcport->keep_nport_handle = 1; if (vha->flags.nvme_enabled) fcport->fc4f_nvme = 1; @@ -4042,6 +4043,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->login_retry = vha->hw->login_retry_count; fcport->plogi_nack_done_deadline = jiffies + HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->keep_nport_handle = 1; fcport->n2n_flag = 1; fcport->d_id.b.domain = rptid_entry->u.f2.remote_nport_id[2]; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 8411dd5acd43..ee47de9fbc05 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5135,11 +5135,9 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) if (dfcp) qlt_schedule_sess_for_deletion(tfcp); - - if (N2N_TOPO(vha->hw)) - fcport->flags &= ~FCF_FABRIC_DEVICE; - if (N2N_TOPO(vha->hw)) { + fcport->flags &= ~FCF_FABRIC_DEVICE; + fcport->keep_nport_handle = 1; if (vha->flags.nvme_enabled) { fcport->fc4f_nvme = 1; fcport->n2n_flag = 1; From 0aabb6b699f72dca96988d3f428e222f932dc889 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:11 -0700 Subject: [PATCH 026/591] scsi: qla2xxx: Fix Nport ID display value For N2N, the NPort ID is assigned by driver in the PLOGI ELS. According to FW Spec the byte order for SID is not the same as DID. Link: https://lore.kernel.org/r/20190912180918.6436-8-hmadhani@marvell.com Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_iocb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index e92e52aa6e9b..518eb954cf42 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2656,9 +2656,10 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) els_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; els_iocb->port_id[1] = sp->fcport->d_id.b.area; els_iocb->port_id[2] = sp->fcport->d_id.b.domain; - els_iocb->s_id[0] = vha->d_id.b.al_pa; - els_iocb->s_id[1] = vha->d_id.b.area; - els_iocb->s_id[2] = vha->d_id.b.domain; + /* For SID the byte order is different than DID */ + els_iocb->s_id[1] = vha->d_id.b.al_pa; + els_iocb->s_id[2] = vha->d_id.b.area; + els_iocb->s_id[0] = vha->d_id.b.domain; if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) { els_iocb->control_flags = 0; From 21a045e430e56725628f361dede8a882e92469b9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 20 Sep 2019 21:45:33 +0200 Subject: [PATCH 027/591] ieee802154: mcr20a: simplify a bit 'mcr20a_handle_rx_read_buf_complete()' Use a 'skb_put_data()' variant instead of rewritting it. The __skb_put_data variant is safe here. It is obvious that the skb can not overflow. It has just been allocated a few lines above with the same 'len'. Signed-off-by: Christophe JAILLET Acked-by: Xue Liu Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 17f2300e63ee..8dc04e2590b1 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -800,7 +800,7 @@ mcr20a_handle_rx_read_buf_complete(void *context) if (!skb) return; - memcpy(skb_put(skb, len), lp->rx_buf, len); + __skb_put_data(skb, lp->rx_buf, len); ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]); print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1, From ddef29578a81a1d4d8f2b26a7adbfe21407ee3ea Mon Sep 17 00:00:00 2001 From: "Wunderlich, Mark" Date: Wed, 18 Sep 2019 23:36:37 +0000 Subject: [PATCH 028/591] nvme-tcp: fix wrong stop condition in io_work Allow the do/while statement to continue if current time is not after the proposed time 'deadline'. Intent is to allow loop to proceed for a specific time period. Currently the loop, as coded, will exit after first pass. Signed-off-by: Mark Wunderlich Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ffd5957637a..385a5212c10f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w) { struct nvme_tcp_queue *queue = container_of(w, struct nvme_tcp_queue, io_work); - unsigned long start = jiffies + msecs_to_jiffies(1); + unsigned long deadline = jiffies + msecs_to_jiffies(1); do { bool pending = false; @@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (!pending) return; - } while (time_after(jiffies, start)); /* quota is exhausted */ + } while (!time_after(jiffies, deadline)); /* quota is exhausted */ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } From 7cbb5c6f9aa7cfda7175d82a9cf77a92965b0c5e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Sep 2019 13:15:55 -0500 Subject: [PATCH 029/591] nvme-pci: Save PCI state before putting drive into deepest state The action of saving the PCI state will cause numerous PCI configuration space reads which depending upon the vendor implementation may cause the drive to exit the deepest NVMe state. In these cases ASPM will typically resolve the PCIe link state and APST may resolve the NVMe power state. However it has also been observed that this register access after quiesced will cause PC10 failure on some device combinations. To resolve this, move the PCI state saving to before SetFeatures has been called. This has been proven to resolve the issue across a 5000 sample test on previously failing disk/system combinations. Signed-off-by: Mario Limonciello Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6b4d7b064b38..d3f63f0c212a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2944,11 +2944,21 @@ static int nvme_suspend(struct device *dev) if (ret < 0) goto unfreeze; + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); + ret = nvme_set_power_state(ctrl, ctrl->npss); if (ret < 0) goto unfreeze; if (ret) { + /* discard the saved state */ + pci_load_saved_state(pdev, NULL); + /* * Clearing npss forces a controller reset on resume. The * correct value will be resdicovered then. @@ -2956,14 +2966,7 @@ static int nvme_suspend(struct device *dev) nvme_dev_disable(ndev, true); ctrl->npss = 0; ret = 0; - goto unfreeze; } - /* - * A saved state prevents pci pm from generically controlling the - * device's power. If we're using protocol specific settings, we don't - * want pci interfering. - */ - pci_save_state(pdev); unfreeze: nvme_unfreeze(ctrl); return ret; From bc4f6e06a90ea016855fc67212b4d500145f0b8a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 Sep 2019 17:18:36 +0300 Subject: [PATCH 030/591] nvme: fix an error code in nvme_init_subsystem() "ret" should be a negative error code here, but it's either success or possibly uninitialized. Fixes: 32fd90c40768 ("nvme: change locking for the per-subsystem controller list") Signed-off-by: Dan Carpenter Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0c385b1994fe..d3c9df62a9d5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2543,8 +2543,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, - dev_name(ctrl->device))) { + ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, + dev_name(ctrl->device)); + if (ret) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); goto out_put_subsystem; From ff13c1b87c97275b82b2af99044b4abf6861b28f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sat, 21 Sep 2019 23:58:19 +0300 Subject: [PATCH 031/591] nvme-rdma: Fix max_hw_sectors calculation By default, the NVMe/RDMA driver should support max io_size of 1MiB (or upto the maximum supported size by the HCA). Currently, one will see that /sys/class/block//queue/max_hw_sectors_kb is 1020 instead of 1024. A non power of 2 value can cause performance degradation due to unnecessary splitting of IO requests and unoptimized allocation units. The number of pages per MR has been fixed here, so there is no longer any need to reduce max_sectors by 1. Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dfa07bb9dfeb..9d16dfc29368 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) { return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len); + ibdev->attrs.max_fast_reg_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); enum ib_poll_context poll_ctx; - int ret; + int ret, pages_per_mr; queue->device = nvme_rdma_find_get_device(queue->cm_id); if (!queue->device) { @@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + /* + * Currently we don't use SG_GAPS MR's so if the first entry is + * misaligned we'll end up using two entries for a single data page, + * so one additional entry is required. + */ + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev), 0); + pages_per_mr, 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", @@ -820,8 +826,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + ctrl->ctrl.max_segments = ctrl->max_fr_pages; + ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); From f03e42c6af60f778a6d1ccfb857db9b2ec835279 Mon Sep 17 00:00:00 2001 From: Gabriel Craciunescu Date: Mon, 23 Sep 2019 20:22:56 +0200 Subject: [PATCH 032/591] Added QUIRKs for ADATA XPG SX8200 Pro 512GB Booting with default_ps_max_latency_us >6000 makes the device fail. Also SUBNQN is NULL and gives a warning on each boot/resume. $ nvme id-ctrl /dev/nvme0 | grep ^subnqn subnqn : (null) I use this device with an Acer Nitro 5 (AN515-43-R8BF) Laptop. To be sure is not a Laptop issue only, I tested the device on my server board with the same results. ( with 2x,4x link on the board and 4x link on a PCI-E card ). Signed-off-by: Gabriel Craciunescu Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d3f63f0c212a..7ab07f4dce83 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3091,6 +3091,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, From 30f27d57c06e0199a9d3e0775113e13e2ae9078e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 13 Sep 2019 10:36:40 -0700 Subject: [PATCH 033/591] nvmet-tcp: remove superflous check on request sgl Now that sgl_free is null safe, drop the superflous check. Signed-off-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index bf4f03474e89..d535080b781f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); return NVME_SC_INTERNAL; } @@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) if (queue->nvme_sq.sqhd_disabled) { kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } return 1; @@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, return -EAGAIN; kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) From 19ea025e1d28c629b369c3532a85b3df478cc5c6 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 24 Sep 2019 17:42:41 +0800 Subject: [PATCH 034/591] nvme: Add quirk for Kingston NVME SSD running FW E8FK11.T Kingston NVME SSD with firmware version E8FK11.T has no interrupt after resume with actions related to suspend to idle. This patch applied NVME_QUIRK_SIMPLE_SUSPEND quirk to fix this issue. Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend") Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=204887 Signed-off-by: Jian-Hong Pan Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d3c9df62a9d5..adff57ea149e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2292,6 +2292,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kingston E8FK11.T firmware version has no interrupt + * after resume with actions related to suspend to idle + * https://bugzilla.kernel.org/show_bug.cgi?id=204887 + */ + .vid = 0x2646, + .fr = "E8FK11.T", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; From 65e68edce0db433aa0c2b26d7dc14fbbbeb89fbb Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Tue, 24 Sep 2019 15:14:52 +0200 Subject: [PATCH 035/591] nvme: allow 64-bit results in passthru commands It is not possible to get 64-bit results from the passthru commands, what prevents from getting for the Capabilities (CAP) property value. As a result, it is not possible to implement IOL's NVMe Conformance test 4.3 Case 1 for Fabrics targets [1] (page 123). This issue has been already discussed [2], but without a solution. This patch solves the problem by adding new ioctls with a new passthru structure, including 64-bit results. The older ioctls stay unchanged. [1] https://www.iol.unh.edu/sites/default/files/testsuites/nvme/UNH-IOL_NVMe_Conformance_Test_Suite_v11.0.pdf [2] http://lists.infradead.org/pipermail/linux-nvme/2018-June/018791.html Signed-off-by: Marta Rybczynska Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 108 +++++++++++++++++++++++++++----- include/uapi/linux/nvme_ioctl.h | 23 +++++++ 2 files changed, 115 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index adff57ea149e..87ed437a46b8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -850,7 +850,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u32 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -891,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, else ret = nvme_req(req)->status; if (result) - *result = le32_to_cpu(nvme_req(req)->result.u32); + *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -1338,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_command c; unsigned timeout = 0; u32 effects; + u64 result; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + effects = nvme_passthru_start(ctrl, ns, cmd.opcode); + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)(uintptr_t)cmd.addr, cmd.data_len, + (void __user *)(uintptr_t)cmd.metadata, + cmd.metadata_len, 0, &result, timeout); + nvme_passthru_end(ctrl, effects); + + if (status >= 0) { + if (put_user(result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd64 __user *ucmd) +{ + struct nvme_passthru_cmd64 cmd; + struct nvme_command c; + unsigned timeout = 0; + u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1408,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } +static bool is_ctrl_ioctl(unsigned int cmd) +{ + if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) + return true; + if (is_sed_ioctl(cmd)) + return true; + return false; +} + +static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, + struct nvme_ns_head *head, + int srcu_idx) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + ret = nvme_user_cmd(ctrl, NULL, argp); + break; + case NVME_IOCTL_ADMIN64_CMD: + ret = nvme_user_cmd64(ctrl, NULL, argp); + break; + default: + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); + break; + } + nvme_put_ctrl(ctrl); + return ret; +} + static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1425,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, * seperately and drop the ns SRCU reference early. This avoids a * deadlock when deleting namespaces using the passthrough interface. */ - if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { - struct nvme_ctrl *ctrl = ns->ctrl; - - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - - if (cmd == NVME_IOCTL_ADMIN_CMD) - ret = nvme_user_cmd(ctrl, NULL, argp); - else - ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - - nvme_put_ctrl(ctrl); - return ret; - } + if (is_ctrl_ioctl(cmd)) + return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); switch (cmd) { case NVME_IOCTL_ID: @@ -1451,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, case NVME_IOCTL_SUBMIT_IO: ret = nvme_submit_io(ns, argp); break; + case NVME_IOCTL_IO64_CMD: + ret = nvme_user_cmd64(ns->ctrl, ns, argp); + break; default: if (ns->ndev) ret = nvme_nvm_ioctl(ns, cmd, arg); @@ -2852,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 1c215ea1798e..e168dc59e9a0 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -45,6 +45,27 @@ struct nvme_passthru_cmd { __u32 result; }; +struct nvme_passthru_cmd64 { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u64 result; +}; + #define nvme_admin_cmd nvme_passthru_cmd #define NVME_IOCTL_ID _IO('N', 0x40) @@ -54,5 +75,7 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) #define NVME_IOCTL_RESCAN _IO('N', 0x46) +#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ From 2b1ff255d2d043aa4e6d62f50c478b283f9943ac Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 24 Sep 2019 14:22:08 -0700 Subject: [PATCH 036/591] nvme: Add ctrl attributes for queue_count and sqsize Current controller interrogation requires a lot of guesswork on how many io queues were created and what the io sq size is. The numbers are dependent upon core/fabric defaults, connect arguments, and target responses. Add sysfs attributes for queue_count and sqsize. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 87ed437a46b8..fd7dea36c3b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3135,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -3215,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_address.attr, &dev_attr_state.attr, &dev_attr_numa_node.attr, + &dev_attr_queue_count.attr, + &dev_attr_sqsize.attr, NULL }; From a0f0037e908c656573d165aadbfae6c05fc4dd75 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 26 Sep 2019 08:54:03 +0800 Subject: [PATCH 037/591] KVM: LAPIC: Loosen filter for adaptive tuning of lapic_timer_advance_ns 5000 guest cycles delta is easy to encounter on desktop, per-vCPU lapic_timer_advance_ns always keeps at 1000ns initial value, let's loosen the filter a bit to let adaptive tuning make progress. Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3a3a6854dcca..87b0fcc23ef8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -66,9 +66,10 @@ #define X2APIC_BROADCAST 0xFFFFFFFFul static bool lapic_timer_advance_dynamic __read_mostly; -#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 -#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 -#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_NS_INIT 1000 +#define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1504,8 +1505,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) - timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) + timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -2302,7 +2303,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { - apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; From a1a640b8c0cd8a2a7f84ab694f04bc64dc6988af Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 25 Sep 2019 11:17:14 -0700 Subject: [PATCH 038/591] kvm: x86: Fix a spurious -E2BIG in __do_cpuid_func Don't return -E2BIG from __do_cpuid_func when processing function 0BH or 1FH and the last interesting subleaf occupies the last allocated entry in the result array. Cc: Paolo Bonzini Fixes: 831bf664e9c1fc ("KVM: Refactor and simplify kvm_dev_ioctl_get_supported_cpuid") Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 63316036f85a..6bf4a261b308 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -618,16 +618,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, */ case 0x1f: case 0xb: { - int i, level_type; + int i; - /* read more entries until level_type is zero */ - for (i = 1; ; ++i) { + /* + * We filled in entry[0] for CPUID(EAX=, + * ECX=00H) above. If its level type (ECX[15:8]) is + * zero, then the leaf is unimplemented, and we're + * done. Otherwise, continue to populate entries + * until the level type (ECX[15:8]) of the previously + * added entry is zero. + */ + for (i = 1; entry[i - 1].ecx & 0xff00; ++i) { if (*nent >= maxnent) goto out; - level_type = entry[i - 1].ecx & 0xff00; - if (!level_type) - break; do_host_cpuid(&entry[i], function, i); ++*nent; } From 3ca94192278ca8de169d78c085396c424be123b3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 18 Sep 2019 17:50:10 +0800 Subject: [PATCH 039/591] KVM: X86: Fix userspace set invalid CR4 Reported by syzkaller: WARNING: CPU: 0 PID: 6544 at /home/kernel/data/kvm/arch/x86/kvm//vmx/vmx.c:4689 handle_desc+0x37/0x40 [kvm_intel] CPU: 0 PID: 6544 Comm: a.out Tainted: G OE 5.3.0-rc4+ #4 RIP: 0010:handle_desc+0x37/0x40 [kvm_intel] Call Trace: vmx_handle_exit+0xbe/0x6b0 [kvm_intel] vcpu_enter_guest+0x4dc/0x18d0 [kvm] kvm_arch_vcpu_ioctl_run+0x407/0x660 [kvm] kvm_vcpu_ioctl+0x3ad/0x690 [kvm] do_vfs_ioctl+0xa2/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x74/0x720 entry_SYSCALL_64_after_hwframe+0x49/0xbe When CR4.UMIP is set, guest should have UMIP cpuid flag. Current kvm set_sregs function doesn't have such check when userspace inputs sregs values. SECONDARY_EXEC_DESC is enabled on writes to CR4.UMIP in vmx_set_cr4 though guest doesn't have UMIP cpuid flag. The testcast triggers handle_desc warning when executing ltr instruction since guest architectural CR4 doesn't set UMIP. This patch fixes it by adding valid CR4 and CPUID combination checking in __set_sregs. syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=138efb99600000 Reported-by: syzbot+0f1819555fbdce992df9@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ed07d8d2caa..180c7e88577a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -885,34 +885,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + if (cr4 & CR4_RESERVED_BITS) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + return -EINVAL; + + return 0; +} + int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; - if (cr4 & CR4_RESERVED_BITS) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) - return 1; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + if (kvm_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -8714,10 +8722,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in @@ -8736,7 +8740,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return -EINVAL; } - return 0; + return kvm_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) From 43561123ab3759eb6ff47693aec1a307af0aef83 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 25 Sep 2019 17:04:17 -0700 Subject: [PATCH 040/591] kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH For these CPUID leaves, the EDX output is not dependent on the ECX input (i.e. the SIGNIFCANT_INDEX flag doesn't apply to EDX). Furthermore, the low byte of the ECX output is always identical to the low byte of the ECX input. KVM does not produce the correct ECX and EDX outputs for any undefined subleaves beyond the first. Special-case these CPUID leaves in kvm_cpuid, so that the ECX and EDX outputs are properly generated for all undefined subleaves. Fixes: 0771671749b59a ("KVM: Enhance guest cpuid management") Fixes: a87f2d3a6eadab ("KVM: x86: Add Intel CPUID.1F cpuid emulation support") Signed-off-by: Jim Mattson Reviewed-by: Marc Orr Reviewed-by: Peter Shier Reviewed-by: Jacob Xu Cc: Sean Christopherson Cc: Paolo Bonzini Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 81 +++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6bf4a261b308..a26a1ae03145 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -973,53 +973,64 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); /* - * If no match is found, check whether we exceed the vCPU's limit - * and return the content of the highest valid _standard_ leaf instead. - * This is to satisfy the CPUID specification. + * If the basic or extended CPUID leaf requested is higher than the + * maximum supported basic or extended leaf, respectively, then it is + * out of range. */ -static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, - u32 function, u32 index) +static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function) { - struct kvm_cpuid_entry2 *maxlevel; + struct kvm_cpuid_entry2 *max; - maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); - if (!maxlevel || maxlevel->eax >= function) - return NULL; - if (function & 0x80000000) { - maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); - if (!maxlevel) - return NULL; - } - return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); + max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); + return max && function <= max->eax; } bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; - struct kvm_cpuid_entry2 *best; - bool entry_found = true; + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *max; + bool found; - best = kvm_find_cpuid_entry(vcpu, function, index); - - if (!best) { - entry_found = false; - if (!check_limit) - goto out; - - best = check_cpuid_limit(vcpu, function, index); + entry = kvm_find_cpuid_entry(vcpu, function, index); + found = entry; + /* + * Intel CPUID semantics treats any query for an out-of-range + * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were + * requested. + */ + if (!entry && check_limit && !cpuid_function_in_range(vcpu, function)) { + max = kvm_find_cpuid_entry(vcpu, 0, 0); + if (max) { + function = max->eax; + entry = kvm_find_cpuid_entry(vcpu, function, index); + } } - -out: - if (best) { - *eax = best->eax; - *ebx = best->ebx; - *ecx = best->ecx; - *edx = best->edx; - } else + if (entry) { + *eax = entry->eax; + *ebx = entry->ebx; + *ecx = entry->ecx; + *edx = entry->edx; + } else { *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); - return entry_found; + /* + * When leaf 0BH or 1FH is defined, CL is pass-through + * and EDX is always the x2APIC ID, even for undefined + * subleaves. Index 1 will exist iff the leaf is + * implemented, so we pass through CL iff leaf 1 + * exists. EDX can be copied from any existing index. + */ + if (function == 0xb || function == 0x1f) { + entry = kvm_find_cpuid_entry(vcpu, function, 1); + if (entry) { + *ecx = index & 0xff; + *edx = entry->edx; + } + } + } + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found); + return found; } EXPORT_SYMBOL_GPL(kvm_cpuid); From 5f41a37b151f6459e0b650a2f4d1d59b6c02d1ab Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 25 Sep 2019 17:04:18 -0700 Subject: [PATCH 041/591] kvm: x86: Use AMD CPUID semantics for AMD vCPUs When the guest CPUID information represents an AMD vCPU, return all zeroes for queries of undefined CPUID leaves, whether or not they are in range. Signed-off-by: Jim Mattson Fixes: bd22f5cfcfe8f6 ("KVM: move and fix substitue search for missing CPUID entries") Reviewed-by: Marc Orr Reviewed-by: Peter Shier Reviewed-by: Jacob Xu Cc: Sean Christopherson Cc: Paolo Bonzini Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a26a1ae03145..ce4a1b3cc3e8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -998,9 +998,11 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, /* * Intel CPUID semantics treats any query for an out-of-range * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were - * requested. + * requested. AMD CPUID semantics returns all zeroes for any + * undefined leaf, whether or not the leaf is in range. */ - if (!entry && check_limit && !cpuid_function_in_range(vcpu, function)) { + if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) && + !cpuid_function_in_range(vcpu, function)) { max = kvm_find_cpuid_entry(vcpu, 0, 0); if (max) { function = max->eax; From 40bc47b08b6efc8b64fef77cb46974f634215425 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 24 Sep 2019 13:51:08 -0700 Subject: [PATCH 042/591] kvm: x86: Enumerate support for CLZERO instruction CLZERO is available to the guest if it is supported on the host. Therefore, enumerate support for the instruction in KVM_GET_SUPPORTED_CPUID whenever it is supported on the host. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ce4a1b3cc3e8..34d9f9f481a3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -485,8 +485,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); + F(CLZERO) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | + F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | F(AMD_SSBD) | + F(VIRT_SSBD) | F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = From 504ce1954fba888936c9d13ccc1e3db9b8f613d5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 25 Sep 2019 23:37:21 +0200 Subject: [PATCH 043/591] KVM: x86: Expose XSAVEERPTR to the guest I was surprised to see that the guest reported `fxsave_leak' while the host did not. After digging deeper I noticed that the bits are simply masked out during enumeration. The XSAVEERPTR feature is actually a bug fix on AMD which means the kernel can disable a workaround. Pass XSAVEERPTR to the guest if available on the host. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 34d9f9f481a3..9c5029cf6f3f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -485,9 +485,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(CLZERO) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | - F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | F(AMD_SSBD) | - F(VIRT_SSBD) | F(AMD_SSB_NO); + F(CLZERO) | F(XSAVEERPTR) | + F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = From f968688f44f529f96a64c9853fb2fb5d0a329aff Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 26 Sep 2019 12:44:39 +0900 Subject: [PATCH 044/591] nvme: Move ctrl sqsize to generic space This isn't specific to fabrics. Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b5013c101b35..38a83ef5bcd3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -221,6 +221,7 @@ struct nvme_ctrl { u16 oacs; u16 nssa; u16 nr_streams; + u16 sqsize; u32 max_namespaces; atomic_t abort_limit; u8 vwc; @@ -269,7 +270,6 @@ struct nvme_ctrl { u16 hmmaxd; /* Fabrics only */ - u16 sqsize; u32 ioccsz; u32 iorcsz; u16 icdoff; From a0ecd6fdbf5d648123a7315c695fb6850d702835 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 23:30:30 -0500 Subject: [PATCH 045/591] drm/komeda: prevent memory leak in komeda_wb_connector_add In komeda_wb_connector_add if drm_writeback_connector_init fails the allocated memory for kwb_conn should be released. Signed-off-by: Navid Emamdoost Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/20190925043031.32308-1-navid.emamdoost@gmail.com --- drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index 23fbee268119..b72840c06ab7 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -165,8 +165,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, &komeda_wb_encoder_helper_funcs, formats, n_formats); komeda_put_fourcc_list(formats); - if (err) + if (err) { + kfree(kwb_conn); return err; + } drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs); From 6eeb4ef049e7cd89783e8ebe1ea2f1dac276f82c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 24 Sep 2019 12:43:08 +0200 Subject: [PATCH 046/591] KVM: x86: assign two bits to track SPTE kinds Currently, we are overloading SPTE_SPECIAL_MASK to mean both "A/D bits unavailable" and MMIO, where the difference between the two is determined by mio_mask and mmio_value. However, the next patch will need two bits to distinguish availability of A/D bits from write protection. So, while at it give MMIO its own bit pattern, and move the two bits from bit 62 to bits 52..53 since Intel is allocating EPT page table bits from the top. Reviewed-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 ------- arch/x86/kvm/mmu.c | 28 ++++++++++++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 23edf56cf577..50eb430b0ad8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -219,13 +219,6 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. - */ -#define SPTE_SPECIAL_MASK (1ULL << 62) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5269aa057dfa..bac8d228d82b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -83,7 +83,16 @@ module_param(dbg, bool, 0644); #define PTE_PREFETCH_NUM 8 #define PT_FIRST_AVAIL_BITS_SHIFT 10 -#define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT64_SECOND_AVAIL_BITS_SHIFT 54 + +/* + * The mask used to denote special SPTEs, which can be either MMIO SPTEs or + * Access Tracking SPTEs. + */ +#define SPTE_SPECIAL_MASK (3ULL << 52) +#define SPTE_AD_ENABLED_MASK (0ULL << 52) +#define SPTE_AD_DISABLED_MASK (1ULL << 52) +#define SPTE_MMIO_MASK (3ULL << 52) #define PT64_LEVEL_BITS 9 @@ -219,12 +228,11 @@ static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask; /* - * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. - * Non-present SPTEs with shadow_acc_track_value set are in place for access - * tracking. + * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK; + * shadow_acc_track_mask is the set of bits to be cleared in non-accessed + * pages. */ static u64 __read_mostly shadow_acc_track_mask; -static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; /* * The mask/shift to use for saving the original R/X bits when marking the PTE @@ -304,7 +312,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); - shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; + shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } @@ -323,7 +331,7 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return !(spte & shadow_acc_track_value); + return (spte & SPTE_SPECIAL_MASK) == SPTE_AD_ENABLED_MASK; } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -461,7 +469,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, { BUG_ON(!dirty_mask != !accessed_mask); BUG_ON(!accessed_mask && !acc_track_mask); - BUG_ON(acc_track_mask & shadow_acc_track_value); + BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK); shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -2622,7 +2630,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, shadow_user_mask | shadow_x_mask | shadow_me_mask; if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; else spte |= shadow_accessed_mask; @@ -2968,7 +2976,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, sp = page_header(__pa(sptep)); if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; /* * For the EPT case, shadow_present_mask is 0 if hardware From 1f4e5fc83a4217fc61b23370b07573827329d7bd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 26 Sep 2019 18:47:59 +0200 Subject: [PATCH 047/591] KVM: x86: fix nested guest live migration with PML Shadow paging is fundamentally incompatible with the page-modification log, because the GPAs in the log come from the wrong memory map. In particular, for the EPT page-modification log, the GPAs in the log come from L2 rather than L1. (If there was a non-EPT page-modification log, we couldn't use it for shadow paging because it would log GVAs rather than GPAs). Therefore, we need to rely on write protection to record dirty pages. This has the side effect of bypassing PML, since writes now result in an EPT violation vmexit. This is relatively easy to add to KVM, because pretty much the only place that needs changing is spte_clear_dirty. The first access to the page already goes through the page fault path and records the correct GPA; it's only subsequent accesses that are wrong. Therefore, we can equip set_spte (where the first access happens) to record that the SPTE will have to be write protected, and then spte_clear_dirty will use this information to do the right thing. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bac8d228d82b..24c23c66b226 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -92,6 +92,7 @@ module_param(dbg, bool, 0644); #define SPTE_SPECIAL_MASK (3ULL << 52) #define SPTE_AD_ENABLED_MASK (0ULL << 52) #define SPTE_AD_DISABLED_MASK (1ULL << 52) +#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) #define SPTE_MMIO_MASK (3ULL << 52) #define PT64_LEVEL_BITS 9 @@ -328,10 +329,27 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) return sp->role.ad_disabled; } +static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) +{ + /* + * When using the EPT page-modification log, the GPAs in the log + * would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages. This also bypasses + * PML, since writes now result in a vmexit. + */ + return vcpu->arch.mmu == &vcpu->arch.guest_mmu; +} + static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return (spte & SPTE_SPECIAL_MASK) == SPTE_AD_ENABLED_MASK; + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK; +} + +static inline bool spte_ad_need_write_protect(u64 spte) +{ + MMU_WARN_ON(is_mmio_spte(spte)); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -1597,16 +1615,16 @@ static bool spte_clear_dirty(u64 *sptep) rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); + MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; - return mmu_spte_update(sptep, spte); } -static bool wrprot_ad_disabled_spte(u64 *sptep) +static bool spte_wrprot_for_clear_dirty(u64 *sptep) { bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); - if (was_writable) + if (was_writable && !spte_ad_enabled(*sptep)) kvm_set_pfn_dirty(spte_to_pfn(*sptep)); return was_writable; @@ -1625,10 +1643,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - if (spte_ad_enabled(*sptep)) - flush |= spte_clear_dirty(sptep); + if (spte_ad_need_write_protect(*sptep)) + flush |= spte_wrprot_for_clear_dirty(sptep); else - flush |= wrprot_ad_disabled_spte(sptep); + flush |= spte_clear_dirty(sptep); return flush; } @@ -1639,6 +1657,11 @@ static bool spte_set_dirty(u64 *sptep) rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); + /* + * Similar to the !kvm_x86_ops->slot_disable_log_dirty case, + * do not bother adding back write access to pages marked + * SPTE_AD_WRPROT_ONLY_MASK. + */ spte |= shadow_dirty_mask; return mmu_spte_update(sptep, spte); @@ -2977,6 +3000,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, sp = page_header(__pa(sptep)); if (sp_ad_disabled(sp)) spte |= SPTE_AD_DISABLED_MASK; + else if (kvm_vcpu_ad_need_write_protect(vcpu)) + spte |= SPTE_AD_WRPROT_ONLY_MASK; /* * For the EPT case, shadow_present_mask is 0 if hardware From 094444204570a5420d9e6ce3d4558877c3487856 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 26 Sep 2019 15:01:15 +0200 Subject: [PATCH 048/591] selftests: kvm: add test for dirty logging inside nested guests Check that accesses by nested guests are logged according to the L1 physical addresses rather than L2. Most of the patch is really adding EPT support to the testing framework. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/processor.h | 3 + .../selftests/kvm/include/x86_64/vmx.h | 14 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- .../selftests/kvm/lib/kvm_util_internal.h | 3 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 201 +++++++++++++++++- .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 156 ++++++++++++++ 7 files changed, 377 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62c591f87dab..fd84b7a78dcf 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0c17f2ee685e..ff234018219c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1083,6 +1083,9 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* VMX_EPT_VPID_CAP bits */ +#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) + /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 69b17055f63d..6ae5a47fe067 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -569,6 +569,10 @@ struct vmx_pages { void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; void *enlightened_vmcs; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); @@ -576,4 +580,14 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); + #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 80a338b5403c..41cf45416060 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -705,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region * +struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f36262e0f655..ac50c42750cf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 9cef0455b819..fab8f6b0bf52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,11 +7,39 @@ #include "test_util.h" #include "kvm_util.h" +#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + bool enable_evmcs; +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) { uint16_t evmcs_ver; @@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { + uint32_t sec_exec_ctl = 0; + vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else + else { vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -327,3 +375,152 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c new file mode 100644 index 000000000000..0bca1cfe2c1e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_SIZE 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(void) +{ + *(volatile uint64_t *)NESTED_TEST_MEM1; + *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + bool done = false; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + run = vcpu_state(vm, VCPU_ID); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_SIZE, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, + TEST_MEM_SIZE * 4096, 0); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + + bmap = bitmap_alloc(TEST_MEM_SIZE); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } +} From 4b0b2b096da9d296e0e5668cdfba8613bd6f5bc8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Sep 2019 12:59:23 -0700 Subject: [PATCH 049/591] libsubcmd: Make _FORTIFY_SOURCE defines dependent on the feature Unconditionally defining _FORTIFY_SOURCE can break tools that don't work with it, such as memory sanitizers: https://github.com/google/sanitizers/wiki/AddressSanitizer#faq Fixes: 4b6ab94eabe4 ("perf subcmd: Create subcmd library") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20190925195924.152834-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index ed61fb3a46c0..5b2cd5e58df0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -20,7 +20,13 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC + +ifeq ($(DEBUG),0) + ifeq ($(feature-fortify-source), 1) + CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 + endif +endif ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 From e3e2cf3d5b1fe800b032e14c0fdcd9a6fb20cf3b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Sep 2019 12:59:24 -0700 Subject: [PATCH 050/591] perf tests: Avoid raising SEGV using an obvious NULL dereference An optimized build such as: make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-O3 will turn the dereference operation into a ud2 instruction, raising a SIGILL rather than a SIGSEGV. Use raise(..) for correctness and clarity. Similar issues were addressed in Numfor Mbiziwo-Tiapo's patch: https://lkml.org/lkml/2019/7/8/1234 Committer testing: Before: [root@quaco ~]# perf test hooks 55: perf hooks : Ok [root@quaco ~]# perf test -v hooks 55: perf hooks : --- start --- test child forked, pid 17092 SIGSEGV is observed as expected, try to recover. Fatal error (SEGFAULT) in perf hook 'test' test child finished with 0 ---- end ---- perf hooks: Ok [root@quaco ~]# After: [root@quaco ~]# perf test hooks 55: perf hooks : Ok [root@quaco ~]# perf test -v hooks 55: perf hooks : --- start --- test child forked, pid 17909 SIGSEGV is observed as expected, try to recover. Fatal error (SEGFAULT) in perf hook 'test' test child finished with 0 ---- end ---- perf hooks: Ok [root@quaco ~]# Fixes: a074865e60ed ("perf tools: Introduce perf hooks") Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lore.kernel.org/lkml/20190925195924.152834-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-hooks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index dbc27199c65e..dd865e0bea12 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -19,12 +19,11 @@ static void sigsegv_handler(int sig __maybe_unused) static void the_hook(void *_hook_flags) { int *hook_flags = _hook_flags; - int *p = NULL; *hook_flags = 1234; /* Generate a segfault, test perf_hooks__recover */ - *p = 0; + raise(SIGSEGV); } int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) From d586ac10ce56b2381b8e1d8ed74660c1b2b8ab0d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 20 Sep 2019 21:13:27 -0700 Subject: [PATCH 051/591] perf docs: Allow man page date to be specified With this change if a perf_date parameter is provided to asciidoc then it will override the default date written to the man page metadata. Without this change, or if the perf_date isn't specified, then the current date is written to the metadata. Having this parameter allows the metadata to be constant if builds happen on different dates. The name of the parameter is intended to be consistent with the existing perf_version parameter. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20190921041327.155054-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/asciidoc.conf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf index 356b23a40339..2b62ba1e72b7 100644 --- a/tools/perf/Documentation/asciidoc.conf +++ b/tools/perf/Documentation/asciidoc.conf @@ -71,6 +71,9 @@ ifdef::backend-docbook[] [header] template::[header-declarations] +ifdef::perf_date[] +{perf_date} +endif::perf_date[] {mantitle} {manvolnum} From 08a96a31474a732fd654575ced843b94bc3212e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2019 09:28:11 -0300 Subject: [PATCH 052/591] tools headers uapi: Sync drm/i915_drm.h with the kernel sources To pick the change in: bf73fc0fa9cf ("drm/i915: Show support for accurate sw PMU busyness tracking") That don't result in any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Chris Wilson Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-o651nt7vpz93tu3nmx4f3xql@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 328d05e77d9f..469dc512cca3 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) +#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) #define I915_PARAM_HUC_STATUS 42 From b9023b91dd020ad7e093baa5122b6968c48cc9e0 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 26 Sep 2019 15:51:01 +0200 Subject: [PATCH 053/591] tick: broadcast-hrtimer: Fix a race in bc_set_next When a cpu requests broadcasting, before starting the tick broadcast hrtimer, bc_set_next() checks if the timer callback (bc_handler) is active using hrtimer_try_to_cancel(). But hrtimer_try_to_cancel() does not provide the required synchronization when the callback is active on other core. The callback could have already executed tick_handle_oneshot_broadcast() and could have also returned. But still there is a small time window where the hrtimer_try_to_cancel() returns -1. In that case bc_set_next() returns without doing anything, but the next_event of the tick broadcast clock device is already set to a timeout value. In the race condition diagram below, CPU #1 is running the timer callback and CPU #2 is entering idle state and so calls bc_set_next(). In the worst case, the next_event will contain an expiry time, but the hrtimer will not be started which happens when the racing callback returns HRTIMER_NORESTART. The hrtimer might never recover if all further requests from the CPUs to subscribe to tick broadcast have timeout greater than the next_event of tick broadcast clock device. This leads to cascading of failures and finally noticed as rcu stall warnings Here is a depiction of the race condition CPU #1 (Running timer callback) CPU #2 (Enter idle and subscribe to tick broadcast) --------------------- --------------------- __run_hrtimer() tick_broadcast_enter() bc_handler() __tick_broadcast_oneshot_control() tick_handle_oneshot_broadcast() raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; //wait for tick_broadcast_lock //next_event for tick broadcast clock set to KTIME_MAX since no other cores subscribed to tick broadcasting raw_spin_unlock(&tick_broadcast_lock); if (dev->next_event == KTIME_MAX) return HRTIMER_NORESTART // callback function exits without restarting the hrtimer //tick_broadcast_lock acquired raw_spin_lock(&tick_broadcast_lock); tick_broadcast_set_event() clockevents_program_event() dev->next_event = expires; bc_set_next() hrtimer_try_to_cancel() //returns -1 since the timer callback is active. Exits without restarting the timer cpu_base->running = NULL; The comment that hrtimer cannot be armed from within the callback is wrong. It is fine to start the hrtimer from within the callback. Also it is safe to start the hrtimer from the enter/exit idle code while the broadcast handler is active. The enter/exit idle code and the broadcast handler are synchronized using tick_broadcast_lock. So there is no need for the existing try to cancel logic. All this can be removed which will eliminate the race condition as well. Fixes: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") Originally-by: Thomas Gleixner Signed-off-by: Balasubramani Vivekanandan Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190926135101.12102-2-balasubramani_vivekanandan@mentor.com --- kernel/time/tick-broadcast-hrtimer.c | 62 +++++++++++++--------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index c1f5bb590b5e..b5a65e212df2 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt) */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { - int bc_moved; /* - * We try to cancel the timer first. If the callback is on - * flight on some other cpu then we let it handle it. If we - * were able to cancel the timer nothing can rearm it as we - * own broadcast_lock. + * This is called either from enter/exit idle code or from the + * broadcast handler. In all cases tick_broadcast_lock is held. * - * However we can also be called from the event handler of - * ce_broadcast_hrtimer itself when it expires. We cannot - * restart the timer because we are in the callback, but we - * can set the expiry time and let the callback return - * HRTIMER_RESTART. + * hrtimer_cancel() cannot be called here neither from the + * broadcast handler nor from the enter/exit idle code. The idle + * code can run into the problem described in bc_shutdown() and the + * broadcast handler cannot wait for itself to complete for obvious + * reasons. * - * Since we are in the idle loop at this point and because - * hrtimer_{start/cancel} functions call into tracing, - * calls to these functions must be bound within RCU_NONIDLE. + * Each caller tries to arm the hrtimer on its own CPU, but if the + * hrtimer callbback function is currently running, then + * hrtimer_start() cannot move it and the timer stays on the CPU on + * which it is assigned at the moment. + * + * As this can be called from idle code, the hrtimer_start() + * invocation has to be wrapped with RCU_NONIDLE() as + * hrtimer_start() can call into tracing. */ - RCU_NONIDLE( - { - bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; - if (bc_moved) { - hrtimer_start(&bctimer, expires, - HRTIMER_MODE_ABS_PINNED_HARD); - } - } - ); - - if (bc_moved) { - /* Bind the "device" to the cpu */ - bc->bound_on = smp_processor_id(); - } else if (bc->bound_on == smp_processor_id()) { - hrtimer_set_expires(&bctimer, expires); - } + RCU_NONIDLE( { + hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD); + /* + * The core tick broadcast mode expects bc->bound_on to be set + * correctly to prevent a CPU which has the broadcast hrtimer + * armed from going deep idle. + * + * As tick_broadcast_lock is held, nothing can change the cpu + * base which was just established in hrtimer_start() above. So + * the below access is safe even without holding the hrtimer + * base lock. + */ + bc->bound_on = bctimer.base->cpu_base->cpu; + } ); return 0; } @@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) { ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); - if (clockevent_state_oneshot(&ce_broadcast_hrtimer)) - if (ce_broadcast_hrtimer.next_event != KTIME_MAX) - return HRTIMER_RESTART; - return HRTIMER_NORESTART; } From 19a36d329f5b1e5d3d06f7093de992693d0fdee6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 26 Aug 2019 15:30:23 -0400 Subject: [PATCH 054/591] KVM: VMX: Set VMENTER_L1D_FLUSH_NOT_REQUIRED if !X86_BUG_L1TF The l1tf_vmx_mitigation is only set to VMENTER_L1D_FLUSH_NOT_REQUIRED when the ARCH_CAPABILITIES MSR indicates that L1D flush is not required. However, if the CPU is not affected by L1TF, l1tf_vmx_mitigation will still be set to VMENTER_L1D_FLUSH_AUTO. This is certainly not the best option for a !X86_BUG_L1TF CPU. So force l1tf_vmx_mitigation to VMENTER_L1D_FLUSH_NOT_REQUIRED to make it more explicit in case users are checking the vmentry_l1d_flush parameter. Signed-off-by: Waiman Long [Patch rewritten accoring to Borislav Petkov's suggestion. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4575ffb3cec..e7970a2e8eae 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) struct page *page; unsigned int i; + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; return 0; @@ -7995,12 +8000,10 @@ static int __init vmx_init(void) * contain 'auto' which will be turned into the default 'cond' * mitigation mode. */ - if (boot_cpu_has(X86_BUG_L1TF)) { - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; } #ifdef CONFIG_KEXEC_CORE From 2e4a75976dfb88ee6ce019b1d4fa507aabd02c14 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 27 Sep 2019 17:54:13 +0200 Subject: [PATCH 055/591] KVM: selftests: x86: clarify what is reported on KVM_GET_MSRS failure When KVM_GET_MSRS fail the report looks like ==== Test Assertion Failure ==== lib/x86_64/processor.c:1089: r == nmsrs pid=28775 tid=28775 - Argument list too long 1 0x000000000040a55f: vcpu_save_state at processor.c:1088 (discriminator 3) 2 0x00000000004010e3: main at state_test.c:171 (discriminator 4) 3 0x00007fb8e69223d4: ?? ??:0 4 0x0000000000401287: _start at ??:? Unexpected result from KVM_GET_MSRS, r: 36 (failed at 194) and it's not obvious that '194' here is the failed MSR index and that it's printed in hex. Change that. Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c53dbc6bc568..6698cb741e10 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); From 67b483dd03c4cd9e90e4c3943132dce514ea4e88 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Sep 2019 11:27:05 -0700 Subject: [PATCH 056/591] nvme-rdma: fix possible use-after-free in connect timeout If the connect times out, we may have already destroyed the queue in the timeout handler, so test if the queue is still allocated in the connect error handler. Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9d16dfc29368..4d280160dd3f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -620,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) if (!ret) { set_bit(NVME_RDMA_Q_LIVE, &queue->flags); } else { - __nvme_rdma_stop_queue(queue); + if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + __nvme_rdma_stop_queue(queue); dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); } From a12de1d42d74ef3c80e9fb9a2da94daaef747869 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 27 Sep 2019 15:24:30 +0800 Subject: [PATCH 057/591] blk-mq: honor IO scheduler for multiqueue devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a device is using multiple queues, the IO scheduler may be bypassed. This may hurt performance for some slow MQ devices, and it also breaks zoned devices which depend on mq-deadline for respecting the write order in one zone. Don't bypass io scheduler if we have one setup. This patch can double sequential write performance basically on MQ scsi_debug when mq-deadline is applied. Cc: Bart Van Assche Cc: Hannes Reinecke Cc: Dave Chinner Reviewed-by: Javier González Reviewed-by: Damien Le Moal Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6e3b15f70cd7..5d4bef3c378d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2012,6 +2012,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } blk_add_rq_to_plug(plug, rq); + } else if (q->elevator) { + blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,8 +2037,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { + } else if ((q->nr_hw_queues > 1 && is_sync) || + !data.hctx->dispatch_busy) { blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_sched_insert_request(rq, false, true, true); From 3154df262db52f1d27e01020e871f4345ec2f9a8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 27 Sep 2019 15:24:31 +0800 Subject: [PATCH 058/591] blk-mq: apply normal plugging for HDD Some HDD drive may expose multiple hardware queues, such as MegraRaid. Let's apply the normal plugging for such devices because sequential IO may benefit a lot from plug merging. Cc: Bart Van Assche Cc: Hannes Reinecke Cc: Dave Chinner Reviewed-by: Damien Le Moal Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 5d4bef3c378d..ec791156e9cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) /* bypass scheduler for flush rq */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || + !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. + * + * Use normal plugging if this disk is slow HDD, as sequential + * IO may benefit a lot from plug merging. */ unsigned int request_count = plug->rq_count; struct request *last = NULL; From 6402939ec86eaf226c8b8ae00ed983936b164908 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 17 Sep 2019 17:47:12 -0500 Subject: [PATCH 059/591] ieee802154: ca8210: prevent memory leak In ca8210_probe the allocated pdata needs to be assigned to spi_device->dev.platform_data before calling ca8210_get_platform_data. Othrwise when ca8210_get_platform_data fails pdata cannot be released. Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20190917224713.26371-1-navid.emamdoost@gmail.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index b188fce3f641..658b399ac9ea 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3152,12 +3152,12 @@ static int ca8210_probe(struct spi_device *spi_device) goto error; } + priv->spi->dev.platform_data = pdata; ret = ca8210_get_platform_data(priv->spi, pdata); if (ret) { dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n"); goto error; } - priv->spi->dev.platform_data = pdata; ret = ca8210_dev_com_init(priv); if (ret) { From d7d44b6fe40a98e960be92ea8617954c2596d140 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Sep 2019 22:33:57 +0200 Subject: [PATCH 060/591] drm/tilcdc: include linux/pinctrl/consumer.h again This was apparently dropped by accident in a recent cleanup, causing a build failure in some configurations now: drivers/gpu/drm/tilcdc/tilcdc_tfp410.c:296:12: error: implicit declaration of function 'devm_pinctrl_get_select_default' [-Werror,-Wimplicit-function-declaration] Fixes: fcb57664172e ("drm/tilcdc: drop use of drmP.h") Signed-off-by: Arnd Bergmann Reviewed-by: Laurent Pinchart Signed-off-by: Jyri Sarha Link: https://patchwork.freedesktop.org/patch/msgid/02b03f74cf941f52a941a36bdc8dabb4a69fd87e.1569852588.git.jsarha@ti.com Acked-by: Sam Ravnborg --- drivers/gpu/drm/tilcdc/tilcdc_tfp410.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index 525dc1c0f1c1..530edb3b51cc 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include From 833b45de69a6016c4b0cebe6765d526a31a81580 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 30 Sep 2019 18:48:44 +0200 Subject: [PATCH 061/591] kvm: x86, powerpc: do not allow clearing largepages debugfs entry The largepages debugfs entry is incremented/decremented as shadow pages are created or destroyed. Clearing it will result in an underflow, which is harmless to KVM but ugly (and could be misinterpreted by tools that use debugfs information), so make this particular statistic read-only. Cc: kvm-ppc@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s.c | 8 ++++---- arch/x86/kvm/x86.c | 6 +++--- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 10 +++++++--- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d7fcdfa7fee4..ec2547cc5ecb 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,8 +36,8 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ /* #define EXIT_DEBUG */ @@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages) }, - { "largepages_1G", VM_STAT(num_1G_pages) }, + { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, + { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, { NULL } }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 180c7e88577a..8072acaaf028 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fcb46b3374c6..719fc3e15ea4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1090,6 +1090,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e6de3159e682..fd68fbe0a75d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -617,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, + debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind]); } return 0; @@ -3929,7 +3930,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -4177,7 +4180,8 @@ static void kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } From 7ae6d93c8f052b7a77ba56ed0f654e22a2876739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Thu, 26 Sep 2019 10:59:17 +0200 Subject: [PATCH 062/591] net: dsa: qca8k: Use up to 7 ports for all operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The QCA8K family supports up to 7 ports. So use the existing QCA8K_NUM_PORTS define to allocate the switch structure and limit all operations with the switch ports. This was not an issue until commit 0394a63acfe2 ("net: dsa: enable and disable all ports") disabled all unused ports. Since the unused ports 7-11 are outside of the correct register range on this switch some registers were rewritten with invalid content. Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Fixes: a0c02161ecfc ("net: dsa: variable number of ports") Fixes: 0394a63acfe2 ("net: dsa: enable and disable all ports") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 684aa51684db..b00274caae4f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -705,7 +705,7 @@ qca8k_setup(struct dsa_switch *ds) BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); /* Setup connection between CPU port & user ports */ - for (i = 0; i < DSA_MAX_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), @@ -1077,7 +1077,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); + priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS); if (!priv->ds) return -ENOMEM; From e9789c7cc182484fc031fd88097eb14cb26c4596 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Sep 2019 18:24:43 -0700 Subject: [PATCH 063/591] sch_cbq: validate TCA_CBQ_WRROPT to avoid crash syzbot reported a crash in cbq_normalize_quanta() caused by an out of range cl->priority. iproute2 enforces this check, but malicious users do not. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI Modules linked in: CPU: 1 PID: 26447 Comm: syz-executor.1 Not tainted 5.3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:cbq_normalize_quanta.part.0+0x1fd/0x430 net/sched/sch_cbq.c:902 RSP: 0018:ffff8801a5c333b0 EFLAGS: 00010206 RAX: 0000000020000003 RBX: 00000000fffffff8 RCX: ffffc9000712f000 RDX: 00000000000043bf RSI: ffffffff83be8962 RDI: 0000000100000018 RBP: ffff8801a5c33420 R08: 000000000000003a R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000002ef R13: ffff88018da95188 R14: dffffc0000000000 R15: 0000000000000015 FS: 00007f37d26b1700(0000) GS:ffff8801dad00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004c7cec CR3: 00000001bcd0a006 CR4: 00000000001626f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] cbq_normalize_quanta include/net/pkt_sched.h:27 [inline] [] cbq_addprio net/sched/sch_cbq.c:1097 [inline] [] cbq_set_wrr+0x2d7/0x450 net/sched/sch_cbq.c:1115 [] cbq_change_class+0x987/0x225b net/sched/sch_cbq.c:1537 [] tc_ctl_tclass+0x555/0xcd0 net/sched/sch_api.c:2329 [] rtnetlink_rcv_msg+0x485/0xc10 net/core/rtnetlink.c:5248 [] netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2510 [] rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5266 [] netlink_unicast_kernel net/netlink/af_netlink.c:1324 [inline] [] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1350 [] netlink_sendmsg+0x89a/0xd50 net/netlink/af_netlink.c:1939 [] sock_sendmsg_nosec net/socket.c:673 [inline] [] sock_sendmsg+0x12e/0x170 net/socket.c:684 [] ___sys_sendmsg+0x81d/0x960 net/socket.c:2359 [] __sys_sendmsg+0x105/0x1d0 net/socket.c:2397 [] SYSC_sendmsg net/socket.c:2406 [inline] [] SyS_sendmsg+0x29/0x30 net/socket.c:2404 [] do_syscall_64+0x528/0x770 arch/x86/entry/common.c:305 [] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 06c7a2da21bc..39b427dc7512 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1127,6 +1127,33 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, }; +static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + int err; + + if (!opt) { + NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); + return -EINVAL; + } + + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, + cbq_policy, extack); + if (err < 0) + return err; + + if (tb[TCA_CBQ_WRROPT]) { + const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); + + if (wrr->priority > TC_CBQ_MAXPRIO) { + NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); + err = -EINVAL; + } + } + return err; +} + static int cbq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1139,13 +1166,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); q->delay_timer.function = cbq_undelay; - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, - extack); + err = cbq_opt_parse(tb, opt, extack); if (err < 0) return err; @@ -1464,13 +1485,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t struct cbq_class *parent; struct qdisc_rate_table *rtab = NULL; - if (!opt) { - NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, - extack); + err = cbq_opt_parse(tb, opt, extack); if (err < 0) return err; From 0e141f757b2c78c983df893e9993313e2dc21e38 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Fri, 27 Sep 2019 14:58:20 +0800 Subject: [PATCH 064/591] erspan: remove the incorrect mtu limit for erspan erspan driver calls ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. It causes the dev mtu of the erspan device to not be greater than 1500, this limit value is not correct for ipgre tap device. Tested: Before patch: # ip link set erspan0 mtu 1600 Error: mtu greater than device maximum. After patch: # ip link set erspan0 mtu 1600 # ip -d link show erspan0 21: erspan0@NONE: mtu 1600 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 0 Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a53a543fe055..52690bb3e40f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1446,6 +1446,7 @@ static void erspan_setup(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; From b1ba55cf1cfb9f3e0e00d743534684a25bf66d28 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2019 11:30:30 -0300 Subject: [PATCH 065/591] tools headers uapi: Sync asm-generic/mman-common.h with the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes from: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT") 9c276cc65a58 ("mm: introduce MADV_COLD") That result in these changes in the tools: $ tools/perf/trace/beauty/madvise_behavior.sh > before $ cp include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman-common.h $ git diff diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 63b1f506ea67..c160a5354eb6 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -67,6 +67,9 @@ #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ +#define MADV_COLD 20 /* deactivate these pages */ +#define MADV_PAGEOUT 21 /* reclaim these pages */ + /* compatibility flags */ #define MAP_FILE 0 $ tools/perf/trace/beauty/madvise_behavior.sh > after $ diff -u before after --- before 2019-09-27 11:29:43.346320100 -0300 +++ after 2019-09-27 11:30:03.838570439 -0300 @@ -16,6 +16,8 @@ [17] = "DODUMP", [18] = "WIPEONFORK", [19] = "KEEPONFORK", + [20] = "COLD", + [21] = "PAGEOUT", [100] = "HWPOISON", [101] = "SOFT_OFFLINE", }; $ I.e. now when madvise gets those behaviours as args, it will be able to translate from the number to a human readable string. This addresses the following perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Minchan Kim Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-n40y6c4sa49p29q6sl8w3ufx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/mman-common.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 63b1f506ea67..c160a5354eb6 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -67,6 +67,9 @@ #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ +#define MADV_COLD 20 /* deactivate these pages */ +#define MADV_PAGEOUT 21 /* reclaim these pages */ + /* compatibility flags */ #define MAP_FILE 0 From 05f371f8c55d69e4c04db4473085303291e4e734 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2019 11:42:26 -0300 Subject: [PATCH 066/591] tools headers uapi: Sync linux/usbdevice_fs.h with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick up the changes from: 4ed3350539aa ("USB: usbfs: Add a capability flag for runtime suspend") 7794f486ed0b ("usbfs: Add ioctls for runtime power management") This triggers these changes in the kernel sources, automagically supporting these new ioctls in the 'perf trace' beautifiers. Soon this will be used in things like filter expressions for tracepoints in 'perf record', 'perf trace', 'perf top', i.e. filter expressions will do a lookup to turn things like USBDEVFS_WAIT_FOR_RESUME into _IO('U', 35) before associating the tracepoint expression to tracepoint perf event. $ tools/perf/trace/beauty/usbdevfs_ioctl.sh > before $ cp include/uapi/linux/usbdevice_fs.h tools/include/uapi/linux/usbdevice_fs.h $ git diff diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h index 78efe870c2b7..cf525cddeb94 100644 --- a/tools/include/uapi/linux/usbdevice_fs.h +++ b/tools/include/uapi/linux/usbdevice_fs.h @@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo { #define USBDEVFS_CAP_MMAP 0x20 #define USBDEVFS_CAP_DROP_PRIVILEGES 0x40 #define USBDEVFS_CAP_CONNINFO_EX 0x80 +#define USBDEVFS_CAP_SUSPEND 0x100 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */ @@ -223,5 +224,8 @@ struct usbdevfs_streams { * extending size of the data returned. */ #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) +#define USBDEVFS_FORBID_SUSPEND _IO('U', 33) +#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34) +#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35) #endif /* _UAPI_LINUX_USBDEVICE_FS_H */ $ tools/perf/trace/beauty/usbdevfs_ioctl.sh > after $ diff -u before after --- before 2019-09-27 11:41:50.634867620 -0300 +++ after 2019-09-27 11:42:07.453102978 -0300 @@ -24,6 +24,9 @@ [30] = "DROP_PRIVILEGES", [31] = "GET_SPEED", [32] = "CONNINFO_EX", + [33] = "FORBID_SUSPEND", + [34] = "ALLOW_SUSPEND", + [35] = "WAIT_FOR_RESUME", [3] = "RESETEP", [4] = "SETINTERFACE", [5] = "SETCONFIGURATION", $ This addresses the following perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/usbdevice_fs.h' differs from latest version at 'include/uapi/linux/usbdevice_fs.h' diff -u tools/include/uapi/linux/usbdevice_fs.h include/uapi/linux/usbdevice_fs.h Cc: Alan Stern Cc: Adrian Hunter Cc: Brendan Gregg Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-x1rb109b9nfi7pukota82xhj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/usbdevice_fs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h index 78efe870c2b7..cf525cddeb94 100644 --- a/tools/include/uapi/linux/usbdevice_fs.h +++ b/tools/include/uapi/linux/usbdevice_fs.h @@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo { #define USBDEVFS_CAP_MMAP 0x20 #define USBDEVFS_CAP_DROP_PRIVILEGES 0x40 #define USBDEVFS_CAP_CONNINFO_EX 0x80 +#define USBDEVFS_CAP_SUSPEND 0x100 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */ @@ -223,5 +224,8 @@ struct usbdevfs_streams { * extending size of the data returned. */ #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) +#define USBDEVFS_FORBID_SUSPEND _IO('U', 33) +#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34) +#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35) #endif /* _UAPI_LINUX_USBDEVICE_FS_H */ From 0ae4061223a3d097222cbec6599370e54db17731 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2019 12:01:26 -0300 Subject: [PATCH 067/591] tools headers uapi: Sync linux/fs.h with the kernel sources To pick the changes from: 78a1b96bcf7a ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl") 23c688b54016 ("fscrypt: allow unprivileged users to add/remove keys for v2 policies") 5dae460c2292 ("fscrypt: v2 encryption policy support") 5a7e29924dac ("fscrypt: add FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl") b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") 22d94f493bfb ("fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl") 3b6df59bc4d2 ("fscrypt: use FSCRYPT_* definitions, not FS_*") 2336d0deb2d4 ("fscrypt: use FSCRYPT_ prefix for uapi constants") 7af0ab0d3aab ("fs, fscrypt: move uapi definitions to new header ") That don't trigger any changes in tooling, as it so far is used only for: $ grep -l 'fs\.h' tools/perf/trace/beauty/*.sh | xargs grep regex= tools/perf/trace/beauty/rename_flags.sh:regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+RENAME_([[:alnum:]_]+)[[:space:]]+\(1[[:space:]]*<<[[:space:]]*([[:xdigit:]]+)[[:space:]]*\)[[:space:]]*.*' tools/perf/trace/beauty/sync_file_range.sh:regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*' tools/perf/trace/beauty/usbdevfs_ioctl.sh:regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)(\(\w+\))?[[:space:]]+_IO[CWR]{0,2}\([[:space:]]*(_IOC_\w+,[[:space:]]*)?'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" tools/perf/trace/beauty/usbdevfs_ioctl.sh:regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*" $ This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h' diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-44g48exl9br9ba0t64chqb4i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 55 +-------- tools/include/uapi/linux/fscrypt.h | 181 +++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 186 insertions(+), 51 deletions(-) create mode 100644 tools/include/uapi/linux/fscrypt.h diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 2a616aa3f686..379a612f8f1d 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -13,6 +13,9 @@ #include #include #include +#ifndef __KERNEL__ +#include +#endif /* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ #if !defined(__KERNEL__) @@ -212,57 +215,6 @@ struct fsxattr { #define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) #define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) -/* - * File system encryption support - */ -/* Policy provided via an ioctl on the topmost directory */ -#define FS_KEY_DESCRIPTOR_SIZE 8 - -#define FS_POLICY_FLAGS_PAD_4 0x00 -#define FS_POLICY_FLAGS_PAD_8 0x01 -#define FS_POLICY_FLAGS_PAD_16 0x02 -#define FS_POLICY_FLAGS_PAD_32 0x03 -#define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */ -#define FS_POLICY_FLAGS_VALID 0x07 - -/* Encryption algorithms */ -#define FS_ENCRYPTION_MODE_INVALID 0 -#define FS_ENCRYPTION_MODE_AES_256_XTS 1 -#define FS_ENCRYPTION_MODE_AES_256_GCM 2 -#define FS_ENCRYPTION_MODE_AES_256_CBC 3 -#define FS_ENCRYPTION_MODE_AES_256_CTS 4 -#define FS_ENCRYPTION_MODE_AES_128_CBC 5 -#define FS_ENCRYPTION_MODE_AES_128_CTS 6 -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ -#define FS_ENCRYPTION_MODE_ADIANTUM 9 - -struct fscrypt_policy { - __u8 version; - __u8 contents_encryption_mode; - __u8 filenames_encryption_mode; - __u8 flags; - __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; -}; - -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) -#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) - -/* Parameters for passing an encryption key into the kernel keyring */ -#define FS_KEY_DESC_PREFIX "fscrypt:" -#define FS_KEY_DESC_PREFIX_SIZE 8 - -/* Structure that userspace passes to the kernel keyring */ -#define FS_MAX_KEY_SIZE 64 - -struct fscrypt_key { - __u32 mode; - __u8 raw[FS_MAX_KEY_SIZE]; - __u32 size; -}; - /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * @@ -306,6 +258,7 @@ struct fscrypt_key { #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ #define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_VERITY_FL 0x00100000 /* Verity protected inode */ #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h new file mode 100644 index 000000000000..39ccfe9311c3 --- /dev/null +++ b/tools/include/uapi/linux/fscrypt.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * fscrypt user API + * + * These ioctls can be used on filesystems that support fscrypt. See the + * "User API" section of Documentation/filesystems/fscrypt.rst. + */ +#ifndef _UAPI_LINUX_FSCRYPT_H +#define _UAPI_LINUX_FSCRYPT_H + +#include + +/* Encryption policy flags */ +#define FSCRYPT_POLICY_FLAGS_PAD_4 0x00 +#define FSCRYPT_POLICY_FLAGS_PAD_8 0x01 +#define FSCRYPT_POLICY_FLAGS_PAD_16 0x02 +#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03 +#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 +#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 +#define FSCRYPT_POLICY_FLAGS_VALID 0x07 + +/* Encryption algorithms */ +#define FSCRYPT_MODE_AES_256_XTS 1 +#define FSCRYPT_MODE_AES_256_CTS 4 +#define FSCRYPT_MODE_AES_128_CBC 5 +#define FSCRYPT_MODE_AES_128_CTS 6 +#define FSCRYPT_MODE_ADIANTUM 9 +#define __FSCRYPT_MODE_MAX 9 + +/* + * Legacy policy version; ad-hoc KDF and no key verification. + * For new encrypted directories, use fscrypt_policy_v2 instead. + * + * Careful: the .version field for this is actually 0, not 1. + */ +#define FSCRYPT_POLICY_V1 0 +#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8 +struct fscrypt_policy_v1 { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; +}; +#define fscrypt_policy fscrypt_policy_v1 + +/* + * Process-subscribed "logon" key description prefix and payload format. + * Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead. + */ +#define FSCRYPT_KEY_DESC_PREFIX "fscrypt:" +#define FSCRYPT_KEY_DESC_PREFIX_SIZE 8 +#define FSCRYPT_MAX_KEY_SIZE 64 +struct fscrypt_key { + __u32 mode; + __u8 raw[FSCRYPT_MAX_KEY_SIZE]; + __u32 size; +}; + +/* + * New policy version with HKDF and key verification (recommended). + */ +#define FSCRYPT_POLICY_V2 2 +#define FSCRYPT_KEY_IDENTIFIER_SIZE 16 +struct fscrypt_policy_v2 { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 __reserved[4]; + __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; +}; + +/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */ +struct fscrypt_get_policy_ex_arg { + __u64 policy_size; /* input/output */ + union { + __u8 version; + struct fscrypt_policy_v1 v1; + struct fscrypt_policy_v2 v2; + } policy; /* output */ +}; + +/* + * v1 policy keys are specified by an arbitrary 8-byte key "descriptor", + * matching fscrypt_policy_v1::master_key_descriptor. + */ +#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1 + +/* + * v2 policy keys are specified by a 16-byte key "identifier" which the kernel + * calculates as a cryptographic hash of the key itself, + * matching fscrypt_policy_v2::master_key_identifier. + */ +#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2 + +/* + * Specifies a key, either for v1 or v2 policies. This doesn't contain the + * actual key itself; this is just the "name" of the key. + */ +struct fscrypt_key_specifier { + __u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */ + __u32 __reserved; + union { + __u8 __reserved[32]; /* reserve some extra space */ + __u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; + __u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; + } u; +}; + +/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ +struct fscrypt_add_key_arg { + struct fscrypt_key_specifier key_spec; + __u32 raw_size; + __u32 __reserved[9]; + __u8 raw[]; +}; + +/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */ +struct fscrypt_remove_key_arg { + struct fscrypt_key_specifier key_spec; +#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001 +#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002 + __u32 removal_status_flags; /* output */ + __u32 __reserved[5]; +}; + +/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */ +struct fscrypt_get_key_status_arg { + /* input */ + struct fscrypt_key_specifier key_spec; + __u32 __reserved[6]; + + /* output */ +#define FSCRYPT_KEY_STATUS_ABSENT 1 +#define FSCRYPT_KEY_STATUS_PRESENT 2 +#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3 + __u32 status; +#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001 + __u32 status_flags; + __u32 user_count; + __u32 __out_reserved[13]; +}; + +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ +#define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) +#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) +#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) +#define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) + +/**********************************************************************/ + +/* old names; don't add anything new here! */ +#ifndef __KERNEL__ +#define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE +#define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 +#define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 +#define FS_POLICY_FLAGS_PAD_16 FSCRYPT_POLICY_FLAGS_PAD_16 +#define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 +#define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK +#define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY +#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS +#define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_CBC 3 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS +#define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC +#define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */ +#define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM +#define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX +#define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE +#define FS_MAX_KEY_SIZE FSCRYPT_MAX_KEY_SIZE +#endif /* !__KERNEL__ */ + +#endif /* _UAPI_LINUX_FSCRYPT_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e2e0f06c97d0..cea13cb987d0 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -8,6 +8,7 @@ include/uapi/drm/i915_drm.h include/uapi/linux/fadvise.h include/uapi/linux/fcntl.h include/uapi/linux/fs.h +include/uapi/linux/fscrypt.h include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/in.h From b7ad6108484221f431372b94763b74e550d16c93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2019 12:18:21 -0300 Subject: [PATCH 068/591] tools headers kvm: Sync kvm headers with the kernel sources To pick the changes in: 200824f55eef ("KVM: s390: Disallow invalid bits in kvm_valid_regs and kvm_dirty_regs") 4a53d99dd0c2 ("KVM: VMX: Introduce exit reason for receiving INIT signal on guest-mode") 7396d337cfad ("KVM: x86: Return to userspace with internal error on unexpected exit reason") 92f35b751c71 ("KVM: arm/arm64: vgic: Allow more than 256 vcpus for KVM_IRQ_LINE") None of them trigger any changes in tooling, this time this is just to silence these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/kvm.h' differs from latest version at 'arch/s390/include/uapi/asm/kvm.h' diff -u tools/arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/arm/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm/include/uapi/asm/kvm.h' diff -u tools/arch/arm/include/uapi/asm/kvm.h arch/arm/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Janosch Frank Cc: Jiri Olsa Cc: Liran Alon Cc: Marc Zyngier Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Thomas Huth Link: https://lkml.kernel.org/n/tip-akuugvvjxte26kzv23zp5d2z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 4 +++- tools/arch/arm64/include/uapi/asm/kvm.h | 4 +++- tools/arch/s390/include/uapi/asm/kvm.h | 6 ++++++ tools/arch/x86/include/uapi/asm/vmx.h | 2 ++ tools/include/uapi/linux/kvm.h | 3 +++ 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index a4217c1a5d01..2769360f195c 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -266,8 +266,10 @@ struct kvm_vcpu_events { #define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf #define KVM_ARM_IRQ_TYPE_SHIFT 24 -#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_TYPE_MASK 0xf #define KVM_ARM_IRQ_VCPU_SHIFT 16 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 9a507716ae2f..67c21f9bdbad 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -325,8 +325,10 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf #define KVM_ARM_IRQ_TYPE_SHIFT 24 -#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_TYPE_MASK 0xf #define KVM_ARM_IRQ_VCPU_SHIFT 16 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 47104e5b47fd..436ec7636927 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -231,6 +231,12 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) + +#define KVM_SYNC_S390_VALID_FIELDS \ + (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ + KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..f01950aa7fae 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -31,6 +31,7 @@ #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_PENDING_INTERRUPT 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -90,6 +91,7 @@ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 5e3f12d5359e..233efbb1c81c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -243,6 +243,8 @@ struct kvm_hyperv_exit { #define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* Encounter unexpected vm-exit due to delivery event. */ #define KVM_INTERNAL_ERROR_DELIVERY_EV 3 +/* Encounter unexpected vm-exit reason */ +#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -996,6 +998,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #define KVM_CAP_PMU_EVENT_FILTER 173 +#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 #ifdef KVM_CAP_IRQ_ROUTING From 7d4c85b7035eb2f9ab217ce649dcd1bfaf0cacd3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Sep 2019 15:00:18 -0700 Subject: [PATCH 069/591] perf llvm: Don't access out-of-scope array The 'test_dir' variable is assigned to the 'release' array which is out-of-scope 3 lines later. Extend the scope of the 'release' array so that an out-of-scope array isn't accessed. Bug detected by clang's address sanitizer. Fixes: 07bc5c699a3d ("perf tools: Make fetch_kernel_version() publicly available") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lore.kernel.org/lkml/20190926220018.25402-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 8d04e3d070b1..8b14e4a7f1dc 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -233,14 +233,14 @@ static int detect_kbuild_dir(char **kbuild_dir) const char *prefix_dir = ""; const char *suffix_dir = ""; + /* _UTSNAME_LENGTH is 65 */ + char release[128]; + char *autoconf_path; int err; if (!test_dir) { - /* _UTSNAME_LENGTH is 65 */ - char release[128]; - err = fetch_kernel_version(NULL, release, sizeof(release)); if (err) From 02d084792273e8a5f1813dcad988229a45be96ea Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 27 Sep 2019 10:11:46 +0200 Subject: [PATCH 070/591] perf vendor events s390: Add JSON transaction for machine type 8561 Add s390 transaction counter definition for machine 8561. This is the same file as for the predecessor machine. Fixes: 6e67d77d673d ("perf vendor events s390: Add JSON files for machine type 8561") Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20190927081147.18345-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json b/tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json new file mode 100644 index 000000000000..1a0034f79f73 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json @@ -0,0 +1,7 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + } +] From 0d0e5ecec6116db6031829299e74cc71240c9ff3 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 27 Sep 2019 10:11:47 +0200 Subject: [PATCH 071/591] perf vendor events s390: Use s390 machine name instead of type 8561 In the pmu-events directory for JSON file definitions use the official machine name IBM z15 instead of machine type number 8561. This is consistent with previous machines. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20190927081147.18345-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/basic.json | 0 .../perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/crypto.json | 0 .../perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/crypto6.json | 0 .../pmu-events/arch/s390/{cf_m8561 => cf_z15}/extended.json | 0 .../pmu-events/arch/s390/{cf_m8561 => cf_z15}/transaction.json | 0 tools/perf/pmu-events/arch/s390/mapfile.csv | 2 +- 6 files changed, 1 insertion(+), 1 deletion(-) rename tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/basic.json (100%) rename tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/crypto.json (100%) rename tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/crypto6.json (100%) rename tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/extended.json (100%) rename tools/perf/pmu-events/arch/s390/{cf_m8561 => cf_z15}/transaction.json (100%) diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json similarity index 100% rename from tools/perf/pmu-events/arch/s390/cf_m8561/basic.json rename to tools/perf/pmu-events/arch/s390/cf_z15/basic.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json similarity index 100% rename from tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json rename to tools/perf/pmu-events/arch/s390/cf_z15/crypto.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json similarity index 100% rename from tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json rename to tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json similarity index 100% rename from tools/perf/pmu-events/arch/s390/cf_m8561/extended.json rename to tools/perf/pmu-events/arch/s390/cf_z15/extended.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json similarity index 100% rename from tools/perf/pmu-events/arch/s390/cf_m8561/transaction.json rename to tools/perf/pmu-events/arch/s390/cf_z15/transaction.json diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index bd3fc577139c..61641a3480e0 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -4,4 +4,4 @@ Family-model,Version,Filename,EventType ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core -^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core +^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core From ee212d6ea20887c0ef352be8563ca13dbf965906 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Sat, 28 Sep 2019 01:39:00 +0000 Subject: [PATCH 072/591] perf map: Fix overlapped map handling Whenever an mmap/mmap2 event occurs, the map tree must be updated to add a new entry. If a new map overlaps a previous map, the overlapped section of the previous map is effectively unmapped, but the non-overlapping sections are still valid. maps__fixup_overlappings() is responsible for creating any new map entries from the previously overlapped map. It optionally creates a before and an after map. When creating the after map the existing code failed to adjust the map.pgoff. This meant the new after map would incorrectly calculate the file offset for the ip. This results in incorrect symbol name resolution for any ip in the after region. Make maps__fixup_overlappings() correctly populate map.pgoff. Add an assert that new mapping matches old mapping at the beginning of the after map. Committer-testing: Validated correct parsing of libcoreclr.so symbols from .NET Core 3.0 preview9 (which didn't strip symbols). Preparation: ~/dotnet3.0-preview9/dotnet new webapi -o perfSymbol cd perfSymbol ~/dotnet3.0-preview9/dotnet publish perf record ~/dotnet3.0-preview9/dotnet \ bin/Debug/netcoreapp3.0/publish/perfSymbol.dll ^C Before: perf script --show-mmap-events 2>&1 | grep -e MMAP -e unknown |\ grep libcoreclr.so | head -n 4 dotnet 1907 373352.698780: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615726000(0x768000) @ 0 08:02 5510620 765057155]: \ r-xp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701091: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615974000(0x1000) @ 0x24e000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701241: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615c42000(0x1000) @ 0x51c000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.705249: 250000 cpu-clock: \ 7fe6159a1f99 [unknown] \ (.../3.0.0-preview9-19423-09/libcoreclr.so) After: perf script --show-mmap-events 2>&1 | grep -e MMAP -e unknown |\ grep libcoreclr.so | head -n 4 dotnet 1907 373352.698780: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615726000(0x768000) @ 0 08:02 5510620 765057155]: \ r-xp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701091: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615974000(0x1000) @ 0x24e000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701241: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615c42000(0x1000) @ 0x51c000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so All the [unknown] symbols were resolved. Signed-off-by: Steve MacLean Tested-by: Brian Robbins Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Davidlohr Bueso Cc: Eric Saint-Etienne Cc: John Keeping Cc: John Salem Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Tom McDonald Link: http://lore.kernel.org/lkml/BN8PR21MB136270949F22A6A02335C238F7800@BN8PR21MB1362.namprd21.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 5b83ed1ebbd6..eec9b282c047 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "symbol.h" +#include #include #include #include @@ -850,6 +851,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } after->start = map->end; + after->pgoff += map->end - pos->start; + assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end)); __map_groups__insert(pos->groups, after); if (verbose >= 2 && !use_browser) map__fprintf(after, fp); From b59711e9b0d22fd47abfa00602fd8c365cdd3ab7 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Sat, 28 Sep 2019 01:41:18 +0000 Subject: [PATCH 073/591] perf inject jit: Fix JIT_CODE_MOVE filename During perf inject --jit, JIT_CODE_MOVE records were injecting MMAP records with an incorrect filename. Specifically it was missing the ".so" suffix. Further the JIT_CODE_LOAD record were silently truncating the jr->load.code_index field to 32 bits before generating the filename. Make both records emit the same filename based on the full 64 bit code_index field. Fixes: 9b07e27f88b9 ("perf inject: Add jitdump mmap injection support") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Steve MacLean Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Brian Robbins Cc: Davidlohr Bueso Cc: Eric Saint-Etienne Cc: John Keeping Cc: John Salem Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Tom McDonald Link: http://lore.kernel.org/lkml/BN8PR21MB1362FF8F127B31DBF4121528F7800@BN8PR21MB1362.namprd21.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/jitdump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 1bdf4c6ea3e5..e3ccb0ce1938 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -395,7 +395,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) size_t size; u16 idr_size; const char *sym; - uint32_t count; + uint64_t count; int ret, csize, usize; pid_t pid, tid; struct { @@ -418,7 +418,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) return -1; filename = event->mmap2.filename; - size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, pid, count); @@ -529,7 +529,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) return -1; filename = event->mmap2.filename; - size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, pid, jr->move.code_index); From 2657983b4c0d81632c6a73bae469951b0d341251 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Sat, 28 Sep 2019 01:53:08 +0000 Subject: [PATCH 074/591] perf docs: Correct and clarify jitdump spec Specification claims latest version of jitdump file format is 2. Current jit dump reading code treats 1 as the latest version. Correct spec to match code. The original language made it unclear the value to be written in the magic field. Revise language that the writer always writes the same value. Specify that the reader uses the value to detect endian mismatches. Signed-off-by: Steve MacLean Acked-by: Stephane Eranian Cc: Alexander Shishkin Cc: Andi Kleen Cc: Brian Robbins Cc: Davidlohr Bueso Cc: Eric Saint-Etienne Cc: Jiri Olsa Cc: John Keeping Cc: John Salem Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Tom McDonald Link: http://lore.kernel.org/lkml/BN8PR21MB1362F63CDE7AC69736FC7F9EF7800@BN8PR21MB1362.namprd21.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/jitdump-specification.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt index 4c62b0713651..52152d156ad9 100644 --- a/tools/perf/Documentation/jitdump-specification.txt +++ b/tools/perf/Documentation/jitdump-specification.txt @@ -36,8 +36,8 @@ III/ Jitdump file header format Each jitdump file starts with a fixed size header containing the following fields in order: -* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file -* uint32_t version : a 4-byte value representing the format version. It is currently set to 2 +* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a. +* uint32_t version : a 4-byte value representing the format version. It is currently set to 1 * uint32_t total_size: size in bytes of file header * uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) * uint32_t pad1 : padding. Reserved for future use From e98df280bc2a499fd41d7f9e2d6733884de69902 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Sep 2019 16:35:44 -0700 Subject: [PATCH 075/591] perf script brstackinsn: Fix recovery from LBR/binary mismatch When the LBR data and the instructions in a binary do not match the loop printing instructions could get confused and print a long stream of bogus instructions. The problem was that if the instruction decoder cannot decode an instruction it ilen wasn't initialized, so the loop going through the basic block would continue with the previous value. Harden the code to avoid such problems: - Make sure ilen is always freshly initialized and is 0 for bad instructions. - Do not overrun the code buffer while printing instructions - Print a warning message if the final jump is not on an instruction boundary. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190927233546.11533-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 286fc70d7402..67be8d31afab 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1063,7 +1063,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, continue; insn = 0; - for (off = 0;; off += ilen) { + for (off = 0; off < (unsigned)len; off += ilen) { uint64_t ip = start + off; printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); @@ -1074,6 +1074,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += print_srccode(thread, x.cpumode, ip); break; } else { + ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, dump_insn(&x, ip, buffer + off, len - off, &ilen)); if (ilen == 0) @@ -1083,6 +1084,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, insn++; } } + if (off != (unsigned)len) + printed += fprintf(fp, "\tmismatch of LBR data and executable\n"); } /* @@ -1123,6 +1126,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, goto out; } for (off = 0; off <= end - start; off += ilen) { + ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off, dump_insn(&x, start + off, buffer + off, len - off, &ilen)); if (ilen == 0) From 6bdfd9f118bd59cf0f85d3bf4b72b586adea17c1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Sep 2019 16:35:45 -0700 Subject: [PATCH 076/591] perf jevents: Fix period for Intel fixed counters The Intel fixed counters use a special table to override the JSON information. During this override the period information from the JSON file got dropped, which results in inst_retired.any and similar running with frequency mode instead of a period. Just specify the expected period in the table. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190927233546.11533-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9e37287da924..e2837260ca4d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -450,12 +450,12 @@ static struct fixed { const char *name; const char *event; } fixed[] = { - { "inst_retired.any", "event=0xc0" }, - { "inst_retired.any_p", "event=0xc0" }, - { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, - { "cpu_clk_unhalted.thread", "event=0x3c" }, - { "cpu_clk_unhalted.core", "event=0x3c" }, - { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { "inst_retired.any", "event=0xc0,period=2000003" }, + { "inst_retired.any_p", "event=0xc0,period=2000003" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" }, + { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" }, { NULL, NULL}, }; From f67001a4a08eb124197ed4376941e1da9cf94b42 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 10:55:34 -0300 Subject: [PATCH 077/591] perf tools: Propagate get_cpuid() error For consistency, propagate the exact cause for get_cpuid() to have failed. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-9ig269f7ktnhh99g4l15vpu2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 3 ++- tools/perf/arch/s390/util/header.c | 9 +++++---- tools/perf/arch/x86/util/header.c | 3 ++- tools/perf/builtin-kvm.c | 7 ++++--- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index b6b7bc7e31a1..3b4cdfc5efd6 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -30,7 +31,7 @@ get_cpuid(char *buffer, size_t sz) buffer[nb-1] = '\0'; return 0; } - return -1; + return ENOBUFS; } char * diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 8b0b018d896a..7933f6871c81 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -54,7 +55,7 @@ int get_cpuid(char *buffer, size_t sz) sysinfo = fopen(SYSINFO, "r"); if (sysinfo == NULL) - return -1; + return errno; while ((read = getline(&line, &line_sz, sysinfo)) != -1) { if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) { @@ -89,7 +90,7 @@ int get_cpuid(char *buffer, size_t sz) /* Missing manufacturer, type or model information should not happen */ if (!manufacturer[0] || !type[0] || !model[0]) - return -1; + return EINVAL; /* * Scan /proc/service_levels and return the CPU-MF counter facility @@ -133,14 +134,14 @@ skip_sysinfo: else nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type, model); - return (nbytes >= sz) ? -1 : 0; + return (nbytes >= sz) ? ENOBUFS : 0; } char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); - if (buf && get_cpuid(buf, 128) < 0) + if (buf && get_cpuid(buf, 128)) zfree(&buf); return buf; } diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 662ecf84a421..aa6deb463bf3 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -58,7 +59,7 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) buffer[nb-1] = '\0'; return 0; } - return -1; + return ENOBUFS; } int diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 2227e2f42c09..58a9e0989491 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -705,14 +705,15 @@ static int process_sample_event(struct perf_tool *tool, static int cpu_isa_config(struct perf_kvm_stat *kvm) { - char buf[64], *cpuid; + char buf[128], *cpuid; int err; if (kvm->live) { err = get_cpuid(buf, sizeof(buf)); if (err != 0) { - pr_err("Failed to look up CPU type\n"); - return err; + pr_err("Failed to look up CPU type: %s\n", + str_error_r(err, buf, sizeof(buf))); + return -err; } cpuid = buf; } else From 9db0e3635fb35b6695275774ab909c51221b66ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 11:48:32 -0300 Subject: [PATCH 078/591] perf evsel: Fall back to global 'perf_env' in perf_evsel__env() I.e. if evsel->evlist or evsel->evlist->env isn't set, return the environment for the running machine, as that would be set if reading from a perf.data file. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-uqq4grmhbi12rwb0lfpo6lfu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 ++- tools/perf/util/python.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5591af81a070..abc7fda4a0fe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -30,6 +30,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "util/env.h" #include "util/evsel_config.h" #include "util/evsel_fprintf.h" #include "evlist.h" @@ -2512,7 +2513,7 @@ struct perf_env *perf_evsel__env(struct evsel *evsel) { if (evsel && evsel->evlist) return evsel->evlist->env; - return NULL; + return &perf_env; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 53f31053a27a..02460362256d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -14,6 +14,7 @@ #include "thread_map.h" #include "trace-event.h" #include "mmap.h" +#include "util/env.h" #include #include "../perf-sys.h" @@ -53,6 +54,11 @@ int parse_callchain_record(const char *arg __maybe_unused, return 0; } +/* + * Add this one here not to drag util/env.c + */ +struct perf_env perf_env; + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. From a66fa0619a0ae3585ef09e9c33ecfb5c7c6cb72b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 15:06:01 -0300 Subject: [PATCH 079/591] perf annotate: Propagate perf_env__arch() error The callers of symbol__annotate2() use symbol__strerror_disassemble() to convert its failure returns into a human readable string, so propagate error values from functions it calls, starting with perf_env__arch() that when fails the right thing to do is to look at 'errno' to see why its possible call to uname() failed. Reported-by: Russell King - ARM Linux admin Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-it5d83kyusfhb1q1b0l4pxzs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e830eadfca2a..9b7b9176e713 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2071,7 +2071,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, int err; if (!arch_name) - return -1; + return errno; args.arch = arch = arch__find(arch_name); if (arch == NULL) From 28f4417c3333940b242af03d90214f713bbef232 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 15:11:47 -0300 Subject: [PATCH 080/591] perf annotate: Fix the signedness of failure returns Callers of symbol__annotate() expect a errno value or some other extended error value range in symbol__strerror_disassemble() to convert to a proper error string, fix it when propagating a failure to find the arch specific annotation routines via arch__find(arch_name). Reported-by: Russell King - ARM Linux admin Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-o0k6dw7cas0vvmjjvgsyvu1i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9b7b9176e713..95109acf4808 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2075,7 +2075,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, args.arch = arch = arch__find(arch_name); if (arch == NULL) - return -ENOTSUP; + return ENOTSUP; if (parch) *parch = arch; From 211f493b611eef012841f795166c38ec7528738d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 15:44:13 -0300 Subject: [PATCH 081/591] perf annotate: Propagate the symbol__annotate() error return We were just returning -1 in symbol__annotate() when symbol__annotate() failed, propagate its error as it is used later to pass to symbol__strerror_disassemble() to present a error message to the user, that in some cases were getting: "Invalid -1 error code" Fix it to propagate the error. Reported-by: Russell King - ARM Linux admin Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-0tj89rs9g7nbcyd5skadlvuu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 95109acf4808..1de1a7091c48 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2997,7 +2997,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel, out_free_offsets: zfree(¬es->offsets); - return -1; + return err; } #define ANNOTATION__CFG(n) \ From 42d7a9107d83223a5fcecc6732d626a6c074cbc2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 15:48:12 -0300 Subject: [PATCH 082/591] perf annotate: Fix arch specific ->init() failure errors They are called from symbol__annotate() and to propagate errors that can help understand the problem make them return what symbol__strerror_disassemble() known, i.e. errno codes and other annotation specific errors in a special, out of errnos, range. Reported-by: Russell King - ARM Linux admin Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-pqx7srcv7tixgid251aeboj6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/annotate/instructions.c | 4 ++-- tools/perf/arch/arm64/annotate/instructions.c | 4 ++-- tools/perf/arch/s390/annotate/instructions.c | 6 ++++-- tools/perf/arch/x86/annotate/instructions.c | 6 ++++-- tools/perf/util/annotate.c | 6 ++++++ tools/perf/util/annotate.h | 2 ++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index e1d4b484cc4b..2ff6cedeb9c5 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -37,7 +37,7 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arm = zalloc(sizeof(*arm)); if (!arm) - return -1; + return ENOMEM; #define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)" err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED); @@ -59,5 +59,5 @@ out_free_call: regfree(&arm->call_insn); out_free_arm: free(arm); - return -1; + return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP; } diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 43aa93ed8414..037e292ecd8e 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -95,7 +95,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arm = zalloc(sizeof(*arm)); if (!arm) - return -1; + return ENOMEM; /* bl, blr */ err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED); @@ -118,5 +118,5 @@ out_free_call: regfree(&arm->call_insn); out_free_arm: free(arm); - return -1; + return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP; } diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index 89bb8f2c54ce..a50e70baf918 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -164,8 +164,10 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) if (!arch->initialized) { arch->initialized = true; arch->associate_instruction_ops = s390__associate_ins_ops; - if (cpuid) - err = s390__cpuid_parse(arch, cpuid); + if (cpuid) { + if (s390__cpuid_parse(arch, cpuid)) + err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; + } } return err; diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 44f5aba78210..7eb5621c021d 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -196,8 +196,10 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) if (arch->initialized) return 0; - if (cpuid) - err = x86__cpuid_parse(arch, cpuid); + if (cpuid) { + if (x86__cpuid_parse(arch, cpuid)) + err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; + } arch->initialized = true; return err; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1de1a7091c48..dc15352924f9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1631,6 +1631,12 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map * case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: + scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: + scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d94be9140e31..116e21f49da6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -370,6 +370,8 @@ enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START, SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF, + SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING, + SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP, __SYMBOL_ANNOTATE_ERRNO__END, }; From 16ed3c1e91159e28b02f11f71ff4ce4cbc6f99e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 15:53:33 -0300 Subject: [PATCH 083/591] perf annotate: Return appropriate error code for allocation failures We should return errno or the annotation extra range understood by symbol__strerror_disassemble() instead of -1, fix it, returning ENOMEM instead. Reported-by: Russell King - ARM Linux admin Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-8of1cmj3rz0mppfcshc9bbqq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index dc15352924f9..b49ecdd51188 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1668,7 +1668,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil build_id_path = strdup(filename); if (!build_id_path) - return -1; + return ENOMEM; /* * old style build-id cache has name of XX/XXXXXXX.. while @@ -2977,7 +2977,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel, notes->offsets = zalloc(size * sizeof(struct annotation_line *)); if (notes->offsets == NULL) - return -1; + return ENOMEM; if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; From 11aad897f6d1a28eae3b7e5b293647c522d65819 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 16:04:21 -0300 Subject: [PATCH 084/591] perf annotate: Don't return -1 for error when doing BPF disassembly Return errno when open_memstream() fails and add two new speciall error codes for when an invalid, non BPF file or one without BTF is passed to symbol__disassemble_bpf(), so that its callers can rely on symbol__strerror_disassemble() to convert that to a human readable error message that can help figure out what is wrong, with hints even. Cc: Russell King - ARM Linux admin Cc: Song Liu Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra , Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-usevw9r2gcipfcrbpaueurw0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 19 +++++++++++++++---- tools/perf/util/annotate.h | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b49ecdd51188..4036c7f7b0fb 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1637,6 +1637,13 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map * case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); break; + case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: + scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: + scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", + dso->long_name); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; @@ -1719,13 +1726,13 @@ static int symbol__disassemble_bpf(struct symbol *sym, char tpath[PATH_MAX]; size_t buf_size; int nr_skip = 0; - int ret = -1; char *buf; bfd *bfdf; + int ret; FILE *s; if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) - return -1; + return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, sym->name, sym->start, sym->end - sym->start); @@ -1738,8 +1745,10 @@ static int symbol__disassemble_bpf(struct symbol *sym, assert(bfd_check_format(bfdf, bfd_object)); s = open_memstream(&buf, &buf_size); - if (!s) + if (!s) { + ret = errno; goto out; + } init_disassemble_info(&info, s, (fprintf_ftype) fprintf); @@ -1748,8 +1757,10 @@ static int symbol__disassemble_bpf(struct symbol *sym, info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); - if (!info_node) + if (!info_node) { + return SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; goto out; + } info_linear = info_node->info_linear; sub_id = dso->bpf_prog.sub_id; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 116e21f49da6..d76fd0e81f46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -372,6 +372,8 @@ enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF, SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING, SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP, + SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE, + SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF, __SYMBOL_ANNOTATE_ERRNO__END, }; From 61129dd29f7962f278b618a2a3e8fdb986a66dc8 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 17 Sep 2019 09:18:53 +0200 Subject: [PATCH 085/591] sched: Add __ASSEMBLY__ guards around struct clone_args The addition of struct clone_args to uapi/linux/sched.h is not protected by __ASSEMBLY__ guards, causing a failure to build from source for glibc on RISC-V. Add the guards to fix this. Fixes: 7f192e3cd316 ("fork: add clone3") Signed-off-by: Seth Forshee Cc: Acked-by: Ingo Molnar Link: https://lore.kernel.org/r/20190917071853.12385-1-seth.forshee@canonical.com Signed-off-by: Christian Brauner --- include/uapi/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index b3105ac1381a..851ff1feadd5 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -33,6 +33,7 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ +#ifndef __ASSEMBLY__ /* * Arguments for the clone3 syscall */ @@ -46,6 +47,7 @@ struct clone_args { __aligned_u64 stack_size; __aligned_u64 tls; }; +#endif /* * Scheduling policies From 3969e76909d3aa06715997896184ee684f68d164 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 24 Sep 2019 13:52:37 -0600 Subject: [PATCH 086/591] selftests: pidfd: Fix undefined reference to pthread_create() Fix build failure: undefined reference to `pthread_create' collect2: error: ld returned 1 exit status Fix CFLAGS to include pthread correctly. Fixes: 740378dc7834 ("pidfd: add polling selftests") Signed-off-by: Shuah Khan Reviewed-by: Christian Brauner Cc: Link: https://lore.kernel.org/r/20190924195237.30519-1-skhan@linuxfoundation.org Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 464c9b76148f..7550f08822a3 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -lpthread +CFLAGS += -g -I../../../../usr/include/ -pthread TEST_GEN_PROGS := pidfd_test pidfd_open_test pidfd_poll_test pidfd_wait From 517d6b9c6f71bee15c729155445c42cc4ef77dda Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 18 Sep 2019 08:30:33 +0000 Subject: [PATCH 087/591] erofs: fix return value check in erofs_read_superblock() In case of error, the function read_mapping_page() returns ERR_PTR() not NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: fe7c2423570d ("erofs: use read_mapping_page instead of sb_bread") Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20190918083033.47780-1-weiyongjun1@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index caf9a95173b0..0e369494f2f2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -105,9 +105,9 @@ static int erofs_read_superblock(struct super_block *sb) int ret; page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (!page) { + if (IS_ERR(page)) { erofs_err(sb, "cannot read erofs superblock"); - return -EIO; + return PTR_ERR(page); } sbi = EROFS_SB(sb); From 6927cc05c2b067d4ca8ab6aeaa3b4544bea26ef8 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 19 Sep 2019 14:28:38 +0800 Subject: [PATCH 088/591] MAINTAINERS: erofs: complete sub-entries for erofs Add a formal git tree and missing files for erofs after moving out of staging for everyone to blame in order for better improvement. Acked-by: Chao Yu Link: https://lore.kernel.org/r/20190919062838.106423-1-gaoxiang25@huawei.com Signed-off-by: Gao Xiang --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..30a5b4028d2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6112,7 +6112,10 @@ M: Gao Xiang M: Chao Yu L: linux-erofs@lists.ozlabs.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git +F: Documentation/filesystems/erofs.txt F: fs/erofs/ +F: include/trace/events/erofs.h ERRSEQ ERROR TRACKING INFRASTRUCTURE M: Jeff Layton From 55252ab72b774119afedfdc6d1f142ffa2a9b818 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 22 Sep 2019 02:43:55 +0800 Subject: [PATCH 089/591] erofs: fix erofs_get_meta_page locking due to a cleanup After doing more drop_caches stress test on our products, I found the mistake introduced by a very recent cleanup [1]. The current rule is that "erofs_get_meta_page" should be returned with page locked (although it's mostly unnecessary for read-only fs after pages are PG_uptodate), but a fix should be done for this. [1] https://lore.kernel.org/r/20190904020912.63925-26-gaoxiang25@huawei.com Fixes: 618f40ea026b ("erofs: use read_cache_page_gfp for erofs_get_meta_page") Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20190921184355.149928-1-gaoxiang25@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/data.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8a9fcbd0e8ac..fc3a8d8064f8 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -34,11 +34,15 @@ static void erofs_readendio(struct bio *bio) struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) { - struct inode *const bd_inode = sb->s_bdev->bd_inode; - struct address_space *const mapping = bd_inode->i_mapping; + struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct page *page; - return read_cache_page_gfp(mapping, blkaddr, + page = read_cache_page_gfp(mapping, blkaddr, mapping_gfp_constraint(mapping, ~__GFP_FS)); + /* should already be PageUptodate */ + if (!IS_ERR(page)) + lock_page(page); + return page; } static int erofs_map_blocks_flatmode(struct inode *inode, From dc76ea8c1087b5c44235566ed4be2202d21a8504 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 22 Sep 2019 18:04:34 +0800 Subject: [PATCH 090/591] erofs: fix mis-inplace determination related with noio chain Fix a recent cleanup patch. noio (bypass) chain is handled asynchronously against submit chain, therefore inplace I/O or pagevec cannot be applied to such pages. Add detailed comment for this as well. Fixes: 97e86a858bc3 ("staging: erofs: tidy up decompression frontend") Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20190922100434.229340-1-gaoxiang25@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 96e34c90f814..fad80c97d247 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -575,7 +575,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct erofs_map_blocks *const map = &fe->map; struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); - bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED); + bool tight = true; enum z_erofs_cache_alloctype cache_strategy; enum z_erofs_page_type page_type; @@ -628,8 +628,16 @@ restart_now: preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy, pagepool); - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED); hitted: + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or pagevec (should be processed in strict order.) + */ + tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && + clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); From 2b6509f42dc6c96ef0b625888d4aa56d2e140330 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 27 Sep 2019 18:27:42 +0800 Subject: [PATCH 091/591] MIPS: Loongson64: Fix boot failure after dropping boot_mem_map From commit a94e4f24ec83 ("MIPS: init: Drop boot_mem_map") onwards, add_memory_region() is handled by memblock_add()/memblock_reserve() directly and all bootmem API should be converted to memblock API. Otherwise it will lead to boot failure, especially in the NUMA case because add_memory_region lose the node_id information. Fixes: a94e4f24ec836c8984f83959 ("MIPS: init: Drop boot_mem_map") Signed-off-by: Huacai Chen Signed-off-by: Jiaxun Yang [paul.burton@mips.com: - Invert node_id check to de-indent the switch statement & avoid lines over 80 characters. - Fixup commit reference in commit message.] Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-mips@vger.kernel.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen --- arch/mips/loongson64/common/mem.c | 35 +++++++++++++------------- arch/mips/loongson64/loongson-3/numa.c | 11 +------- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c index 4abb92e0fc39..4254ac4ec616 100644 --- a/arch/mips/loongson64/common/mem.c +++ b/arch/mips/loongson64/common/mem.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include @@ -64,24 +65,22 @@ void __init prom_init_memory(void) node_id = loongson_memmap->map[i].node_id; mem_type = loongson_memmap->map[i].mem_type; - if (node_id == 0) { - switch (mem_type) { - case SYSTEM_RAM_LOW: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_HIGH: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_RESERVED: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RESERVED); - break; - } + if (node_id != 0) + continue; + + switch (mem_type) { + case SYSTEM_RAM_LOW: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_HIGH: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_RESERVED: + memblock_reserve(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; } } } diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 414e97de5dc0..8f20d2cb3767 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -142,8 +142,6 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; @@ -156,16 +154,12 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", (u32)node_id, mem_type, mem_start, mem_size); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RESERVED); memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); break; @@ -191,8 +185,6 @@ static void __init node_mem_init(unsigned int node) NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; - free_bootmem_with_active_regions(node, end_pfn); - if (node == 0) { /* kernel end address */ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); @@ -209,8 +201,6 @@ static void __init node_mem_init(unsigned int node) memblock_reserve((node_addrspace_offset | 0xfe000000), 32 << 20); } - - sparse_memory_present_with_active_regions(node); } static __init void prom_meminit(void) @@ -227,6 +217,7 @@ static __init void prom_meminit(void) cpumask_clear(&__node_data[(node)]->cpumask); } } + memblocks_present(); max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { From 0889d07f3e4b171c453b2aaf2b257f9074cdf624 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 30 Sep 2019 11:39:52 +0200 Subject: [PATCH 092/591] MIPS: dts: ar9331: fix interrupt-controller size It is two registers each of 4 byte. Signed-off-by: Oleksij Rempel Signed-off-by: Paul Burton Cc: Rob Herring Cc: Mark Rutland Cc: Pengutronix Kernel Team Cc: Ralf Baechle Cc: James Hogan Cc: devicetree@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/boot/dts/qca/ar9331.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 63a9f33aa43e..5cfc9d347826 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; From 8c7138b33e5c690c308b2a7085f6313fdcb3f616 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 27 Sep 2019 16:00:31 -0700 Subject: [PATCH 093/591] net: Unpublish sk from sk_reuseport_cb before call_rcu The "reuse->sock[]" array is shared by multiple sockets. The going away sk must unpublish itself from "reuse->sock[]" before making call_rcu() call. However, this unpublish-action is currently done after a grace period and it may cause use-after-free. The fix is to move reuseport_detach_sock() to sk_destruct(). Due to the above reason, any socket with sk_reuseport_cb has to go through the rcu grace period before freeing it. It is a rather old bug (~3 yrs). The Fixes tag is not necessary the right commit but it is the one that introduced the SOCK_RCU_FREE logic and this fix is depending on it. Fixes: a4298e4522d6 ("net: add SOCK_RCU_FREE socket flag") Cc: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/core/sock.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 07863edbe6fc..e23ec80a67bf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1700,8 +1700,6 @@ static void __sk_destruct(struct rcu_head *head) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } - if (rcu_access_pointer(sk->sk_reuseport_cb)) - reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); @@ -1728,7 +1726,14 @@ static void __sk_destruct(struct rcu_head *head) void sk_destruct(struct sock *sk) { - if (sock_flag(sk, SOCK_RCU_FREE)) + bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); + + if (rcu_access_pointer(sk->sk_reuseport_cb)) { + reuseport_detach_sock(sk); + use_call_rcu = true; + } + + if (use_call_rcu) call_rcu(&sk->sk_rcu, __sk_destruct); else __sk_destruct(&sk->sk_rcu); From 13dc8c029cabf52ba95f60c56eb104d4d95d5889 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 21 Sep 2019 15:49:54 +0900 Subject: [PATCH 094/591] kbuild: remove ar-option and KBUILD_ARFLAGS Commit 40df759e2b9e ("kbuild: Fix build with binutils <= 2.19") introduced ar-option and KBUILD_ARFLAGS to deal with old binutils. According to Documentation/process/changes.rst, the current minimal supported version of binutils is 2.21 so you can assume the 'D' option is always supported. Not only GNU ar but also llvm-ar supports it. With the 'D' option hard-coded, there is no more user of ar-option or KBUILD_ARFLAGS. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers --- Documentation/kbuild/makefiles.rst | 5 ----- Makefile | 4 ---- arch/powerpc/boot/Makefile | 2 +- scripts/Kbuild.include | 5 ----- scripts/Makefile.build | 2 +- scripts/Makefile.lib | 2 +- 6 files changed, 3 insertions(+), 17 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 6ba9d5365ff3..b89c88168d6a 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -954,11 +954,6 @@ When kbuild executes, the following steps are followed (roughly): From commandline LDFLAGS_MODULE shall be used (see kbuild.txt). - KBUILD_ARFLAGS Options for $(AR) when creating archives - - $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic - mode) if this option is supported by $(AR). - KBUILD_LDS The linker script with full path. Assigned by the top-level Makefile. diff --git a/Makefile b/Makefile index 6f54f2f95743..7452174f5b21 100644 --- a/Makefile +++ b/Makefile @@ -498,7 +498,6 @@ export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL -export KBUILD_ARFLAGS # Files to ignore in find ... statements @@ -914,9 +913,6 @@ ifdef CONFIG_RETPOLINE KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif -# use the deterministic mode of AR if available -KBUILD_ARFLAGS := $(call ar-option,D) - include scripts/Makefile.kasan include scripts/Makefile.extrawarn include scripts/Makefile.ubsan diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6841bd52738b..dfbd7f22eef5 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -50,7 +50,7 @@ endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc -BOOTARFLAGS := -cr$(KBUILD_ARFLAGS) +BOOTARFLAGS := -crD ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 4b0432e095ae..10ba926ae292 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -143,11 +143,6 @@ cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || e # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) -# ar-option -# Usage: KBUILD_ARFLAGS := $(call ar-option,D) -# Important: no spaces around options -ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2)) - # ld-version # Note this is mainly for HJ Lu's 3 number binutil versions ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index f72aba64d611..a9e47953ca53 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -389,7 +389,7 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ; ifdef builtin-target quiet_cmd_ar_builtin = AR $@ - cmd_ar_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(real-prereqs) + cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) $(builtin-target): $(real-obj-y) FORCE $(call if_changed,ar_builtin) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 4a0cdd6f5909..179d55af5852 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -232,7 +232,7 @@ quiet_cmd_ld = LD $@ # --------------------------------------------------------------------------- quiet_cmd_ar = AR $@ - cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(real-prereqs) + cmd_ar = rm -f $@; $(AR) cDPrsT $@ $(real-prereqs) # Objcopy # --------------------------------------------------------------------------- From b6f2494d311a19b33b19708543e7ef6dea1de459 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 29 Sep 2019 01:08:17 +0300 Subject: [PATCH 095/591] net: dsa: sja1105: Ensure PTP time for rxtstamp reconstruction is not in the past Sometimes the PTP synchronization on the switch 'jumps': ptp4l[11241.155]: rms 8 max 16 freq -21732 +/- 11 delay 742 +/- 0 ptp4l[11243.157]: rms 7 max 17 freq -21731 +/- 10 delay 744 +/- 0 ptp4l[11245.160]: rms 33592410 max 134217731 freq +192422 +/- 8530253 delay 743 +/- 0 ptp4l[11247.163]: rms 811631 max 964131 freq +10326 +/- 557785 delay 743 +/- 0 ptp4l[11249.166]: rms 261936 max 533876 freq -304323 +/- 126371 delay 744 +/- 0 ptp4l[11251.169]: rms 48700 max 57740 freq -20218 +/- 30532 delay 744 +/- 0 ptp4l[11253.171]: rms 14570 max 30163 freq -5568 +/- 7563 delay 742 +/- 0 ptp4l[11255.174]: rms 2914 max 3440 freq -22001 +/- 1667 delay 744 +/- 1 ptp4l[11257.177]: rms 811 max 1710 freq -22653 +/- 451 delay 744 +/- 1 ptp4l[11259.180]: rms 177 max 218 freq -21695 +/- 89 delay 741 +/- 0 ptp4l[11261.182]: rms 45 max 92 freq -21677 +/- 32 delay 742 +/- 0 ptp4l[11263.186]: rms 14 max 32 freq -21733 +/- 11 delay 742 +/- 0 ptp4l[11265.188]: rms 9 max 14 freq -21725 +/- 12 delay 742 +/- 0 ptp4l[11267.191]: rms 9 max 16 freq -21727 +/- 13 delay 742 +/- 0 ptp4l[11269.194]: rms 6 max 15 freq -21726 +/- 9 delay 743 +/- 0 ptp4l[11271.197]: rms 8 max 15 freq -21728 +/- 11 delay 743 +/- 0 ptp4l[11273.200]: rms 6 max 12 freq -21727 +/- 8 delay 743 +/- 0 ptp4l[11275.202]: rms 9 max 17 freq -21720 +/- 11 delay 742 +/- 0 ptp4l[11277.205]: rms 9 max 18 freq -21725 +/- 12 delay 742 +/- 0 Background: the switch only offers partial RX timestamps (24 bits) and it is up to the driver to read the PTP clock to fill those timestamps up to 64 bits. But the PTP clock readout needs to happen quickly enough (in 0.135 seconds, in fact), otherwise the PTP clock will wrap around 24 bits, condition which cannot be detected. Looking at the 'max 134217731' value on output line 3, one can see that in hex it is 0x8000003. Because the PTP clock resolution is 8 ns, that means 0x1000000 in ticks, which is exactly 2^24. So indeed this is a PTP clock wraparound, but the reason might be surprising. What is going on is that sja1105_tstamp_reconstruct(priv, now, ts) expects a "now" time that is later than the "ts" was snapshotted at. This, of course, is obvious: we read the PTP time _after_ the partial RX timestamp was received. However, the workqueue is processing frames from a skb queue and reuses the same PTP time, read once at the beginning. Normally the skb queue only contains one frame and all goes well. But when the skb queue contains two frames, the second frame that gets dequeued might have been partially timestamped by the RX MAC _after_ we had read our PTP time initially. The code was originally like that due to concerns that SPI access for PTP time readout is a slow process, and we are time-constrained anyway (aka: premature optimization). But some timing analysis reveals that the time spent until the RX timestamp is completely reconstructed is 1 order of magnitude lower than the 0.135 s deadline even under worst-case conditions. So we can afford to read the PTP time for each frame in the RX timestamping queue, which of course ensures that the full PTP time is in the partial timestamp's future. Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b9def744bcb3..f3d38ff144c4 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2005,12 +2005,12 @@ static void sja1105_rxtstamp_work(struct work_struct *work) mutex_lock(&priv->ptp_lock); - now = priv->tstamp_cc.read(&priv->tstamp_cc); - while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) { struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); u64 ts; + now = priv->tstamp_cc.read(&priv->tstamp_cc); + *shwt = (struct skb_shared_hwtstamps) {0}; ts = SJA1105_SKB_CB(skb)->meta_tstamp; From 7e35b42591c058b91282f95ce3b2cf0c05ffe93d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 21 Sep 2019 16:05:41 +0900 Subject: [PATCH 096/591] kbuild: remove SUBDIRS support Linux 5.3 is out. Remove the SUBDIRS support. Signed-off-by: Masahiro Yamada --- Makefile | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 7452174f5b21..779c9c9b9820 100644 --- a/Makefile +++ b/Makefile @@ -206,24 +206,8 @@ ifndef KBUILD_CHECKSRC KBUILD_CHECKSRC = 0 endif -# Use make M=dir to specify directory of external module to build -# Old syntax make ... SUBDIRS=$PWD is still supported -# Setting the environment variable KBUILD_EXTMOD take precedence -ifdef SUBDIRS - $(warning ================= WARNING ================) - $(warning 'SUBDIRS' will be removed after Linux 5.3) - $(warning ) - $(warning If you are building an individual subdirectory) - $(warning in the kernel tree, you can do like this:) - $(warning $$ make path/to/dir/you/want/to/build/) - $(warning (Do not forget the trailing slash)) - $(warning ) - $(warning If you are building an external module,) - $(warning Please use 'M=' or 'KBUILD_EXTMOD' instead) - $(warning ==========================================) - KBUILD_EXTMOD ?= $(SUBDIRS) -endif - +# Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the +# directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") KBUILD_EXTMOD := $(M) endif From 807f2105b87af20c4a582fb62111b16689f83bb8 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 21 Sep 2019 15:23:04 -0700 Subject: [PATCH 097/591] kbuild: correct formatting of header in kbuild module docs Minor formatting fixup. Fixes: cd238effefa2 ("docs: kbuild: convert docs to ReST and rename to *.rst") Signed-off-by: Alex Gaynor Signed-off-by: Matthew Garrett Signed-off-by: Masahiro Yamada --- Documentation/kbuild/modules.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index d2ae799237fd..33a17121d11d 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -498,7 +498,8 @@ build. will be written containing all exported symbols that were not defined in the kernel. ---- 6.3 Symbols From Another External Module +6.3 Symbols From Another External Module +---------------------------------------- Sometimes, an external module uses exported symbols from another external module. kbuild needs to have full knowledge of From 47346e96f004eca07720e1e2b24fc7f0b0df4092 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 24 Sep 2019 21:07:40 +0900 Subject: [PATCH 098/591] modpost: fix static EXPORT_SYMBOL warnings for UML build Johannes Berg reports lots of modpost warnings on ARCH=um builds: WARNING: "rename" [vmlinux] is a static EXPORT_SYMBOL WARNING: "lseek" [vmlinux] is a static EXPORT_SYMBOL WARNING: "ftruncate64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "getuid" [vmlinux] is a static EXPORT_SYMBOL WARNING: "lseek64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "unlink" [vmlinux] is a static EXPORT_SYMBOL WARNING: "pwrite64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "close" [vmlinux] is a static EXPORT_SYMBOL WARNING: "opendir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "pread64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "syscall" [vmlinux] is a static EXPORT_SYMBOL WARNING: "readdir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "readdir64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "futimes" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__lxstat" [vmlinux] is a static EXPORT_SYMBOL WARNING: "write" [vmlinux] is a static EXPORT_SYMBOL WARNING: "closedir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__xstat" [vmlinux] is a static EXPORT_SYMBOL WARNING: "fsync" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__lxstat64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__fxstat64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "telldir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "printf" [vmlinux] is a static EXPORT_SYMBOL WARNING: "readlink" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__sprintf_chk" [vmlinux] is a static EXPORT_SYMBOL WARNING: "link" [vmlinux] is a static EXPORT_SYMBOL WARNING: "rmdir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "fdatasync" [vmlinux] is a static EXPORT_SYMBOL WARNING: "truncate" [vmlinux] is a static EXPORT_SYMBOL WARNING: "statfs" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__errno_location" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__xmknod" [vmlinux] is a static EXPORT_SYMBOL WARNING: "open64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "truncate64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "open" [vmlinux] is a static EXPORT_SYMBOL WARNING: "read" [vmlinux] is a static EXPORT_SYMBOL WARNING: "chown" [vmlinux] is a static EXPORT_SYMBOL WARNING: "chmod" [vmlinux] is a static EXPORT_SYMBOL WARNING: "utime" [vmlinux] is a static EXPORT_SYMBOL WARNING: "fchmod" [vmlinux] is a static EXPORT_SYMBOL WARNING: "seekdir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "ioctl" [vmlinux] is a static EXPORT_SYMBOL WARNING: "dup2" [vmlinux] is a static EXPORT_SYMBOL WARNING: "statfs64" [vmlinux] is a static EXPORT_SYMBOL WARNING: "utimes" [vmlinux] is a static EXPORT_SYMBOL WARNING: "mkdir" [vmlinux] is a static EXPORT_SYMBOL WARNING: "fchown" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__guard" [vmlinux] is a static EXPORT_SYMBOL WARNING: "symlink" [vmlinux] is a static EXPORT_SYMBOL WARNING: "access" [vmlinux] is a static EXPORT_SYMBOL WARNING: "__stack_smash_handler" [vmlinux] is a static EXPORT_SYMBOL When you run "make", the modpost is run twice; before linking vmlinux, and before building modules. All the warnings above are from the second modpost. The offending symbols are defined not in vmlinux, but in the C library. The first modpost is run against the relocatable vmlinux.o, and those warnings are nicely suppressed because the SH_UNDEF entries from the symbol table clear the ->is_static flag. The second modpost is run against the executable vmlinux (+ modules), where those symbols have been resolved, but the definitions do not exist. This commit fixes it in a straightforward way; suppress the static EXPORT_SYMBOL warnings from "vmlinux". Without this commit, we see valid warnings twice anyway. For example, ARCH=arm64 defconfig shows the following warning twice: WARNING: "HYPERVISOR_platform_op" [vmlinux] is a static EXPORT_SYMBOL_GPL So, it is reasonable to suppress the second one. Fixes: 15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL* functions") Reported-by: Johannes Berg Signed-off-by: Masahiro Yamada Tested-by: Johannes Berg Tested-by: Denis Efremov --- scripts/mod/modpost.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 3961941e8e7a..442d5e2ad688 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2652,15 +2652,20 @@ int main(int argc, char **argv) fatal("modpost: Section mismatches detected.\n" "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n"); for (n = 0; n < SYMBOL_HASH_SIZE; n++) { - struct symbol *s = symbolhash[n]; + struct symbol *s; + + for (s = symbolhash[n]; s; s = s->next) { + /* + * Do not check "vmlinux". This avoids the same warnings + * shown twice, and false-positives for ARCH=um. + */ + if (is_vmlinux(s->module->name) && !s->module->is_dot_o) + continue; - while (s) { if (s->is_static) warn("\"%s\" [%s] is a static %s\n", s->name, s->module->name, export_str(s->export)); - - s = s->next; } } From 68501df92d116b760777a2cfda314789f926476f Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sun, 29 Sep 2019 01:43:39 +0300 Subject: [PATCH 099/591] net: dsa: sja1105: Prevent leaking memory In sja1105_static_config_upload, in two cases memory is leaked: when static_config_buf_prepare_for_upload fails and when sja1105_inhibit_tx fails. In both cases config_buf should be released. Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch") Fixes: 1a4c69406cc1 ("net: dsa: sja1105: Prevent PHY jabbering during switch reset") Signed-off-by: Navid Emamdoost Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_spi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 84dc603138cf..58dd37ecde17 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -409,7 +409,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = static_config_buf_prepare_for_upload(priv, config_buf, buf_len); if (rc < 0) { dev_err(dev, "Invalid config, cannot upload\n"); - return -EINVAL; + rc = -EINVAL; + goto out; } /* Prevent PHY jabbering during switch reset by inhibiting * Tx on all ports and waiting for current packet to drain. @@ -418,7 +419,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = sja1105_inhibit_tx(priv, port_bitmap, true); if (rc < 0) { dev_err(dev, "Failed to inhibit Tx on ports\n"); - return -ENXIO; + rc = -ENXIO; + goto out; } /* Wait for an eventual egress packet to finish transmission * (reach IFG). It is guaranteed that a second one will not From 68ce6688a5baefde30914fc07fc27292dbbe8320 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 29 Sep 2019 02:01:39 +0300 Subject: [PATCH 100/591] net: sched: taprio: Fix potential integer overflow in taprio_set_picos_per_byte The speed divisor is used in a context expecting an s64, but it is evaluated using 32-bit arithmetic. To avoid that happening, instead of multiplying by 1,000,000 in the first place, simplify the fraction and do a standard 32 bit division instead. Fixes: f04b514c0ce2 ("taprio: Set default link speed to 10 Mbps in taprio_set_picos_per_byte") Reported-by: Gustavo A. R. Silva Suggested-by: Eric Dumazet Signed-off-by: Vladimir Oltean Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2f7b34205c82..2aab46ada94f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1048,8 +1048,7 @@ static void taprio_set_picos_per_byte(struct net_device *dev, speed = ecmd.base.speed; skip: - picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - speed * 1000 * 1000); + picos_per_byte = (USEC_PER_SEC * 8) / speed; atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", From ec1db1be106101c24f6f24efbb5eb3176f66cb50 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Tue, 17 Sep 2019 21:07:55 +0200 Subject: [PATCH 101/591] staging: exfat: add missing SPDX line to Kconfig Add SPDX identifier to Kconfig. Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20190917190755.21723-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/exfat/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/exfat/Kconfig b/drivers/staging/exfat/Kconfig index 290dbfc7ace1..59e07afe249c 100644 --- a/drivers/staging/exfat/Kconfig +++ b/drivers/staging/exfat/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 config EXFAT_FS tristate "exFAT fs support" depends on BLOCK From a358eea07c78d9d0c246738ed1c6349d72d41920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Wed, 18 Sep 2019 20:38:08 -0400 Subject: [PATCH 102/591] staging: exfat - fix SPDX tags.. The copyright notices as I got them said "GPLv2 or later", which I screwed up when putting in the SPDX tags. Fix them to match the license I got the code under. Signed-off-by: Valdis Kletnieks Link: https://lore.kernel.org/r/122590.1568853488@turing-police Signed-off-by: Greg Kroah-Hartman --- drivers/staging/exfat/Makefile | 2 +- drivers/staging/exfat/exfat.h | 2 +- drivers/staging/exfat/exfat_blkdev.c | 2 +- drivers/staging/exfat/exfat_cache.c | 2 +- drivers/staging/exfat/exfat_core.c | 2 +- drivers/staging/exfat/exfat_nls.c | 2 +- drivers/staging/exfat/exfat_super.c | 2 +- drivers/staging/exfat/exfat_upcase.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/staging/exfat/Makefile b/drivers/staging/exfat/Makefile index 84944dfbae28..6c90aec83feb 100644 --- a/drivers/staging/exfat/Makefile +++ b/drivers/staging/exfat/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-or-later obj-$(CONFIG_EXFAT_FS) += exfat.o diff --git a/drivers/staging/exfat/exfat.h b/drivers/staging/exfat/exfat.h index 6c12f2d79f4d..3abab33e932c 100644 --- a/drivers/staging/exfat/exfat.h +++ b/drivers/staging/exfat/exfat.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_blkdev.c b/drivers/staging/exfat/exfat_blkdev.c index f086c75e7076..81d20e6241c6 100644 --- a/drivers/staging/exfat/exfat_blkdev.c +++ b/drivers/staging/exfat/exfat_blkdev.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_cache.c b/drivers/staging/exfat/exfat_cache.c index 1565ce65d39f..e1b001718709 100644 --- a/drivers/staging/exfat/exfat_cache.c +++ b/drivers/staging/exfat/exfat_cache.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_core.c b/drivers/staging/exfat/exfat_core.c index b3e9cf725cf5..79174e5c4145 100644 --- a/drivers/staging/exfat/exfat_core.c +++ b/drivers/staging/exfat/exfat_core.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_nls.c b/drivers/staging/exfat/exfat_nls.c index 03cb8290b5d2..a5c4b68925fb 100644 --- a/drivers/staging/exfat/exfat_nls.c +++ b/drivers/staging/exfat/exfat_nls.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_super.c b/drivers/staging/exfat/exfat_super.c index 5f6caee819a6..229ecabe7a93 100644 --- a/drivers/staging/exfat/exfat_super.c +++ b/drivers/staging/exfat/exfat_super.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_upcase.c b/drivers/staging/exfat/exfat_upcase.c index 366082fb3dab..b91a1faa0e50 100644 --- a/drivers/staging/exfat/exfat_upcase.c +++ b/drivers/staging/exfat/exfat_upcase.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ From 89d5f78fab48844d750f6f3a419e26f7f828bac8 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 30 Sep 2019 22:05:04 +0900 Subject: [PATCH 103/591] staging: exfat: Fix a typo in Kconfig This patch fix a spelling typo in Kconfig. Signed-off-by: Masanari Iida Link: https://lore.kernel.org/r/20190930130504.21994-1-standby24x7@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/exfat/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/exfat/Kconfig b/drivers/staging/exfat/Kconfig index 59e07afe249c..ce32dfe33bec 100644 --- a/drivers/staging/exfat/Kconfig +++ b/drivers/staging/exfat/Kconfig @@ -7,7 +7,7 @@ config EXFAT_FS This adds support for the exFAT file system. config EXFAT_DONT_MOUNT_VFAT - bool "Prohibit mounting of fat/vfat filesysems by exFAT" + bool "Prohibit mounting of fat/vfat filesystems by exFAT" depends on EXFAT_FS default y help From 7d4dea95f8281fc7f14766a6040e3504d3f658fc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 19 Sep 2019 11:50:22 +0200 Subject: [PATCH 104/591] staging: octeon: Use "(uintptr_t)" to cast from pointer to int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit: In file included from drivers/staging/octeon/octeon-ethernet.h:41, from drivers/staging/octeon/ethernet-tx.c:25: drivers/staging/octeon/octeon-stubs.h: In function ‘cvmx_phys_to_ptr’: drivers/staging/octeon/octeon-stubs.h:1205:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] return (void *)(physical_address); ^ drivers/staging/octeon/ethernet-tx.c: In function ‘cvm_oct_xmit’: drivers/staging/octeon/ethernet-tx.c:264:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); ^ drivers/staging/octeon/octeon-stubs.h:2:30: note: in definition of macro ‘XKPHYS_TO_PHYS’ #define XKPHYS_TO_PHYS(p) (p) ^ drivers/staging/octeon/ethernet-tx.c:268:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); ^ drivers/staging/octeon/octeon-stubs.h:2:30: note: in definition of macro ‘XKPHYS_TO_PHYS’ #define XKPHYS_TO_PHYS(p) (p) ^ drivers/staging/octeon/ethernet-tx.c:276:20: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] XKPHYS_TO_PHYS((u64)skb_frag_address(fs)); ^ drivers/staging/octeon/octeon-stubs.h:2:30: note: in definition of macro ‘XKPHYS_TO_PHYS’ #define XKPHYS_TO_PHYS(p) (p) ^ drivers/staging/octeon/ethernet-tx.c:280:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb)); ^ drivers/staging/octeon/octeon-stubs.h:2:30: note: in definition of macro ‘XKPHYS_TO_PHYS’ #define XKPHYS_TO_PHYS(p) (p) ^ Fix this by replacing casts to "u64" by casts to "uintptr_t", which is either 32-bit or 64-bit, and adding an intermediate cast to "uintptr_t" where needed. Exposed by commit 171a9bae68c72f2d ("staging/octeon: Allow test build on !MIPS"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20190919095022.29099-1-geert@linux-m68k.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-tx.c | 9 +++++---- drivers/staging/octeon/octeon-stubs.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index c64728fc21f2..7021ff07ba2a 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -261,11 +261,11 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) /* Build the PKO buffer pointer */ hw_buffer.u64 = 0; if (skb_shinfo(skb)->nr_frags == 0) { - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data); hw_buffer.s.pool = 0; hw_buffer.s.size = skb->len; } else { - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data); hw_buffer.s.pool = 0; hw_buffer.s.size = skb_headlen(skb); CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64; @@ -273,11 +273,12 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) skb_frag_t *fs = skb_shinfo(skb)->frags + i; hw_buffer.s.addr = - XKPHYS_TO_PHYS((u64)skb_frag_address(fs)); + XKPHYS_TO_PHYS((uintptr_t)skb_frag_address(fs)); hw_buffer.s.size = skb_frag_size(fs); CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64; } - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb)); + hw_buffer.s.addr = + XKPHYS_TO_PHYS((uintptr_t)CVM_OCT_SKB_CB(skb)); hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1; pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1; pko_command.s.gather = 1; diff --git a/drivers/staging/octeon/octeon-stubs.h b/drivers/staging/octeon/octeon-stubs.h index a4ac3bfb62a8..b78ce9eaab85 100644 --- a/drivers/staging/octeon/octeon-stubs.h +++ b/drivers/staging/octeon/octeon-stubs.h @@ -1202,7 +1202,7 @@ static inline int cvmx_wqe_get_grp(cvmx_wqe_t *work) static inline void *cvmx_phys_to_ptr(uint64_t physical_address) { - return (void *)(physical_address); + return (void *)(uintptr_t)(physical_address); } static inline uint64_t cvmx_ptr_to_phys(void *ptr) From 63f2b1677fba11c5bd02089f25c13421948905f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Tue, 17 Sep 2019 19:18:41 +0200 Subject: [PATCH 105/591] staging/fbtft: Depend on OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") removed setting gpios via platform data. This means that fbtft will now only work with Device Tree so set the dependency. This also prevents a NULL pointer deref on non-DT platform because fbtftops.request_gpios is not set in that case anymore. Fixes: c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") Cc: stable Signed-off-by: Noralf Trønnes Link: https://lore.kernel.org/r/20190917171843.10334-1-noralf@tronnes.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fbtft/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/fbtft/Kconfig b/drivers/staging/fbtft/Kconfig index 8ec524a95ec8..4e5d860fd788 100644 --- a/drivers/staging/fbtft/Kconfig +++ b/drivers/staging/fbtft/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 menuconfig FB_TFT tristate "Support for small TFT LCD display modules" - depends on FB && SPI + depends on FB && SPI && OF depends on GPIOLIB || COMPILE_TEST select FB_SYS_FILLRECT select FB_SYS_COPYAREA From 2962db71c7030868b6859d09b2138b55297df95e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Tue, 17 Sep 2019 19:18:42 +0200 Subject: [PATCH 106/591] staging/fbtft: Remove fbtft_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") removed the gpio code from fbtft_device rendering it useless. fbtft_device is a module that was used on the Raspberry Pi to dynamically add fbtft devices when the Pi didn't have Device Tree support. Just remove the module since it's the responsibility of Device Tree, ACPI or platform code to add devices. Fixes: c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") Signed-off-by: Noralf Trønnes Link: https://lore.kernel.org/r/20190917171843.10334-2-noralf@tronnes.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fbtft/Kconfig | 4 - drivers/staging/fbtft/Makefile | 3 - drivers/staging/fbtft/fbtft_device.c | 1261 -------------------------- 3 files changed, 1268 deletions(-) delete mode 100644 drivers/staging/fbtft/fbtft_device.c diff --git a/drivers/staging/fbtft/Kconfig b/drivers/staging/fbtft/Kconfig index 4e5d860fd788..4c37d087e25c 100644 --- a/drivers/staging/fbtft/Kconfig +++ b/drivers/staging/fbtft/Kconfig @@ -205,7 +205,3 @@ config FB_FLEX depends on FB_TFT help Generic Framebuffer support for TFT LCD displays. - -config FB_TFT_FBTFT_DEVICE - tristate "Module to for adding FBTFT devices" - depends on FB_TFT diff --git a/drivers/staging/fbtft/Makefile b/drivers/staging/fbtft/Makefile index 6bc03311c9c7..925ec17e318d 100644 --- a/drivers/staging/fbtft/Makefile +++ b/drivers/staging/fbtft/Makefile @@ -37,6 +37,3 @@ obj-$(CONFIG_FB_TFT_UC1701) += fb_uc1701.o obj-$(CONFIG_FB_TFT_UPD161704) += fb_upd161704.o obj-$(CONFIG_FB_TFT_WATTEROTT) += fb_watterott.o obj-$(CONFIG_FB_FLEX) += flexfb.o - -# Device modules -obj-$(CONFIG_FB_TFT_FBTFT_DEVICE) += fbtft_device.o diff --git a/drivers/staging/fbtft/fbtft_device.c b/drivers/staging/fbtft/fbtft_device.c deleted file mode 100644 index 44e1410eb3fe..000000000000 --- a/drivers/staging/fbtft/fbtft_device.c +++ /dev/null @@ -1,1261 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * - * Copyright (C) 2013, Noralf Tronnes - */ - -#define pr_fmt(fmt) "fbtft_device: " fmt -#include -#include -#include -#include -#include -#include

; if - * found, dentry is grabbed and passed to caller via *. - * If no such element exists, the anchor of list is returned - * and * is set to NULL. + * found, dentry is grabbed and returned to caller. + * If no such element exists, NULL is returned. */ -static struct list_head *scan_positives(struct dentry *cursor, +static struct dentry *scan_positives(struct dentry *cursor, struct list_head *p, loff_t count, - struct dentry **res) + struct dentry *last) { struct dentry *dentry = cursor->d_parent, *found = NULL; @@ -127,9 +126,8 @@ static struct list_head *scan_positives(struct dentry *cursor, } } spin_unlock(&dentry->d_lock); - dput(*res); - *res = found; - return p; + dput(last); + return found; } loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) @@ -149,25 +147,22 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { struct dentry *cursor = file->private_data; struct dentry *to = NULL; - struct list_head *p; - file->f_pos = offset; inode_lock_shared(dentry->d_inode); - if (file->f_pos > 2) { - p = scan_positives(cursor, &dentry->d_subdirs, - file->f_pos - 2, &to); - spin_lock(&dentry->d_lock); - list_move(&cursor->d_child, p); - spin_unlock(&dentry->d_lock); - } else { - spin_lock(&dentry->d_lock); + if (offset > 2) + to = scan_positives(cursor, &dentry->d_subdirs, + offset - 2, NULL); + spin_lock(&dentry->d_lock); + if (to) + list_move(&cursor->d_child, &to->d_child); + else list_del_init(&cursor->d_child); - spin_unlock(&dentry->d_lock); - } - + spin_unlock(&dentry->d_lock); dput(to); + file->f_pos = offset; + inode_unlock_shared(dentry->d_inode); } return offset; @@ -199,17 +194,23 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) if (ctx->pos == 2) p = anchor; - else + else if (!list_empty(&cursor->d_child)) p = &cursor->d_child; + else + return 0; - while ((p = scan_positives(cursor, p, 1, &next)) != anchor) { + while ((next = scan_positives(cursor, p, 1, next)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) break; ctx->pos++; + p = &next->d_child; } spin_lock(&dentry->d_lock); - list_move_tail(&cursor->d_child, p); + if (next) + list_move_tail(&cursor->d_child, &next->d_child); + else + list_del_init(&cursor->d_child); spin_unlock(&dentry->d_lock); dput(next); From 79a85e214d62da9a750cc63ef49483e62abbda81 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Oct 2019 00:38:13 +0900 Subject: [PATCH 528/591] null_blk: Fix zoned command return code The return code from null_handle_zoned() sets the cmd->error value. Returning OK status when an error occured overwrites the intended cmd->error. Return the appropriate error code instead of setting the error in the cmd. Fixes: fceb5d1b19cbe626 ("null_blk: create a helper for zoned devices") Cc: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/null_blk_zoned.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index eabc116832a7..3d7fdea872f8 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -142,8 +142,7 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector) zone->wp = zone->start; break; default: - cmd->error = BLK_STS_NOTSUPP; - break; + return BLK_STS_NOTSUPP; } return BLK_STS_OK; } From 05668e1d74b84c53fbe0f28565e4c9502a6b8a67 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 30 Sep 2019 17:38:02 +0200 Subject: [PATCH 529/591] s390/cio: fix virtio-ccw DMA without PV Commit 37db8985b211 ("s390/cio: add basic protected virtualization support") breaks virtio-ccw devices with VIRTIO_F_IOMMU_PLATFORM for non Protected Virtualization (PV) guests. The problem is that the dma_mask of the ccw device, which is used by virtio core, gets changed from 64 to 31 bit, because some of the DMA allocations do require 31 bit addressable memory. For PV the only drawback is that some of the virtio structures must end up in ZONE_DMA because we have the bounce the buffers mapped via DMA API anyway. But for non PV guests we have a problem: because of the 31 bit mask guests bigger than 2G are likely to try bouncing buffers. The swiotlb however is only initialized for PV guests, because we don't want to bounce anything for non PV guests. The first such map kills the guest. Since the DMA API won't allow us to specify for each allocation whether we need memory from ZONE_DMA (31 bit addressable) or any DMA capable memory will do, let us use coherent_dma_mask (which is used for allocations) to force allocating form ZONE_DMA while changing dma_mask to DMA_BIT_MASK(64) so that at least the streaming API will regard the whole memory DMA capable. Signed-off-by: Halil Pasic Reported-by: Marc Hartmayer Suggested-by: Robin Murphy Fixes: 37db8985b211 ("s390/cio: add basic protected virtualization support") Link: https://lore.kernel.org/lkml/20190930153803.7958-1-pasic@linux.ibm.com Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- drivers/s390/cio/cio.h | 1 + drivers/s390/cio/css.c | 7 ++++++- drivers/s390/cio/device.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index ba7d2480613b..dcdaba689b20 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -113,6 +113,7 @@ struct subchannel { enum sch_todo todo; struct work_struct todo_work; struct schib_config config; + u64 dma_mask; char *driver_override; /* Driver name to force a match */ } __attribute__ ((aligned(8))); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 1fbfb0a93f5f..831850435c23 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -232,7 +232,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, * belong to a subchannel need to fit 31 bit width (e.g. ccw). */ sch->dev.coherent_dma_mask = DMA_BIT_MASK(31); - sch->dev.dma_mask = &sch->dev.coherent_dma_mask; + /* + * But we don't have such restrictions imposed on the stuff that + * is handled by the streaming API. + */ + sch->dma_mask = DMA_BIT_MASK(64); + sch->dev.dma_mask = &sch->dma_mask; return sch; err: diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 131430bd48d9..0c6245fc7706 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -710,7 +710,7 @@ static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch) if (!cdev->private) goto err_priv; cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask; - cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask; + cdev->dev.dma_mask = sch->dev.dma_mask; dma_pool = cio_gp_dma_create(&cdev->dev, 1); if (!dma_pool) goto err_dma_pool; From fd70c7755bf0172ddd33b558aef69c322de3b5cf Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 24 Sep 2019 16:17:02 +0300 Subject: [PATCH 530/591] drm/bridge: tc358767: fix max_tu_symbol value max_tu_symbol was programmed to TU_SIZE_RECOMMENDED - 1, which is not what the spec says. The spec says: roundup ((input active video bandwidth in bytes/output active video bandwidth in bytes) * tu_size) It is not quite clear what the above means, but calculating max_tu_symbol = (input Bps / output Bps) * tu_size seems to work and fixes the issues seen. This fixes artifacts in some videomodes (e.g. 1024x768@60 on 2-lanes & 1.62Gbps was pretty bad for me). Signed-off-by: Tomi Valkeinen Tested-by: Jyri Sarha Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190924131702.9988-1-tomi.valkeinen@ti.com --- drivers/gpu/drm/bridge/tc358767.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index cebc8e620820..8a8d605021f0 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -728,6 +728,8 @@ static int tc_set_video_mode(struct tc_data *tc, int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; u32 dp0_syncval; + u32 bits_per_pixel = 24; + u32 in_bw, out_bw; /* * Recommended maximum number of symbols transferred in a transfer unit: @@ -735,7 +737,10 @@ static int tc_set_video_mode(struct tc_data *tc, * (output active video bandwidth in bytes)) * Must be less than tu_size. */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + + in_bw = mode->clock * bits_per_pixel / 8; + out_bw = tc->link.base.num_lanes * tc->link.base.rate; + max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw); dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); From bed5ef230943863b9abf5eae226a20fad9a8ff71 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 19:09:42 +0200 Subject: [PATCH 531/591] USB: usb-skeleton: fix NULL-deref on disconnect The driver was using its struct usb_interface pointer as an inverted disconnected flag and was setting it to NULL before making sure all completion handlers had run. This could lead to NULL-pointer dereferences in the dev_err() statements in the completion handlers which relies on said pointer. Fix this by using a dedicated disconnected flag. Note that this is also addresses a NULL-pointer dereference at release() and a struct usb_interface reference leak introduced by a recent runtime PM fix, which depends on and should have been submitted together with this patch. Fixes: 4212cd74ca6f ("USB: usb-skeleton.c: remove err() usage") Fixes: 5c290a5e42c3 ("USB: usb-skeleton: fix runtime PM after driver unbind") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009170944.30057-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usb-skeleton.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index 8001d6384c73..c2843fcfa52d 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -61,6 +61,7 @@ struct usb_skel { spinlock_t err_lock; /* lock for errors */ struct kref kref; struct mutex io_mutex; /* synchronize I/O with disconnect */ + unsigned long disconnected:1; wait_queue_head_t bulk_in_wait; /* to wait for an ongoing read */ }; #define to_skel_dev(d) container_of(d, struct usb_skel, kref) @@ -238,7 +239,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count, if (rv < 0) return rv; - if (!dev->interface) { /* disconnect() was called */ + if (dev->disconnected) { /* disconnect() was called */ rv = -ENODEV; goto exit; } @@ -420,7 +421,7 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, /* this lock makes sure we don't submit URBs to gone devices */ mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* disconnect() was called */ + if (dev->disconnected) { /* disconnect() was called */ mutex_unlock(&dev->io_mutex); retval = -ENODEV; goto error; @@ -571,7 +572,7 @@ static void skel_disconnect(struct usb_interface *interface) /* prevent more I/O from starting */ mutex_lock(&dev->io_mutex); - dev->interface = NULL; + dev->disconnected = 1; mutex_unlock(&dev->io_mutex); usb_kill_anchored_urbs(&dev->submitted); From 6353001852776e7eeaab4da78922d4c6f2b076af Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 19:09:43 +0200 Subject: [PATCH 532/591] USB: usb-skeleton: fix use-after-free after driver unbind The driver failed to stop its read URB on disconnect, something which could lead to a use-after-free in the completion handler after driver unbind in case the character device has been closed. Fixes: e7389cc9a7ff ("USB: skel_read really sucks royally") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009170944.30057-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usb-skeleton.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index c2843fcfa52d..be311787403e 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -575,6 +575,7 @@ static void skel_disconnect(struct usb_interface *interface) dev->disconnected = 1; mutex_unlock(&dev->io_mutex); + usb_kill_urb(dev->bulk_in_urb); usb_kill_anchored_urbs(&dev->submitted); /* decrement our usage count */ From 369dca424a3f9ef80bc4e5a5ccdd41a17ce306c1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 19:09:44 +0200 Subject: [PATCH 533/591] USB: usb-skeleton: drop redundant in-urb check The driver bails out at probe if we can't find a bulk-in endpoint or if we fail to allocate the URB, so drop the check in read(). Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009170944.30057-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usb-skeleton.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index be311787403e..2dc58766273a 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -230,8 +230,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count, dev = file->private_data; - /* if we cannot read at all, return EOF */ - if (!dev->bulk_in_urb || !count) + if (!count) return 0; /* no concurrent readers */ From ac9099e10a603f02f52f583fba78dd54c52b5542 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 7 Oct 2019 15:16:01 +0300 Subject: [PATCH 534/591] usb: cdns3: gadget: Fix full-speed mode We need to disable USB3 PHY for full-speed mode else gadget mode is broken. Signed-off-by: Roger Quadros Signed-off-by: Sekhar Nori Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20191007121601.25996-3-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 228cdc4ab886..157536753b8c 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2571,6 +2571,7 @@ static int cdns3_gadget_start(struct cdns3 *cdns) switch (max_speed) { case USB_SPEED_FULL: writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf); + writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); break; case USB_SPEED_HIGH: writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); From 02ffc26df96b487f476b8945eaef13c7f170e79a Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 7 Oct 2019 15:16:00 +0300 Subject: [PATCH 535/591] usb: cdns3: fix cdns3_core_init_role() At startup we should trigger the HW state machine only if it is OTG mode. Otherwise we should just start the respective role. Initialize idle role by default. If we don't do this then cdns3_idle_role_stop() is not called when switching to host/device role and so lane switch mechanism doesn't work. This results to super-speed device not working in one orientation if it was plugged before driver probe. Signed-off-by: Roger Quadros Signed-off-by: Sekhar Nori Link: https://lore.kernel.org/r/20191007121601.25996-2-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 06f1e105be4e..1109dc5a4c39 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -160,10 +160,28 @@ static int cdns3_core_init_role(struct cdns3 *cdns) if (ret) goto err; - if (cdns->dr_mode != USB_DR_MODE_OTG) { + /* Initialize idle role to start with */ + ret = cdns3_role_start(cdns, USB_ROLE_NONE); + if (ret) + goto err; + + switch (cdns->dr_mode) { + case USB_DR_MODE_UNKNOWN: + case USB_DR_MODE_OTG: ret = cdns3_hw_role_switch(cdns); if (ret) goto err; + break; + case USB_DR_MODE_PERIPHERAL: + ret = cdns3_role_start(cdns, USB_ROLE_DEVICE); + if (ret) + goto err; + break; + case USB_DR_MODE_HOST: + ret = cdns3_role_start(cdns, USB_ROLE_HOST); + if (ret) + goto err; + break; } return ret; From eb21a74adaa11846737f503dd618bd35337e2c37 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 7 Oct 2019 13:03:23 +0100 Subject: [PATCH 536/591] usb: cdns3: Fix for incorrect DMA mask. This patch restores the correct DMA mask after switching back to device mode. The issue occurred because Device part of controller use 32 bits DMA and Host side use 64 bits DMA. During loading XHCI driver the DMA mask used by driver is overwritten by XHCI driver so it must be restored to 32 bits. Reported-by: Pawel Laszczak Signed-off-by: Roger Quadros Signed-off-by: Pawel Laszczak Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Reviewed-by: Peter Chen Tested-by: Roger Quadros Link: https://lore.kernel.org/r/1570449803-15299-1-git-send-email-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 157536753b8c..2ca280f4c054 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2663,6 +2663,13 @@ static int __cdns3_gadget_init(struct cdns3 *cdns) { int ret = 0; + /* Ensure 32-bit DMA Mask in case we switched back from Host mode */ + ret = dma_set_mask_and_coherent(cdns->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(cdns->dev, "Failed to set dma mask: %d\n", ret); + return ret; + } + cdns3_drd_switch_gadget(cdns, 1); pm_runtime_get_sync(cdns->dev); From 726b55d0e22ca72c69c947af87785c830289ddbc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 17:38:47 +0200 Subject: [PATCH 537/591] USB: legousbtower: fix use-after-free on release The driver was accessing its struct usb_device in its release() callback without holding a reference. This would lead to a use-after-free whenever the device was disconnected while the character device was still open. Fixes: fef526cae700 ("USB: legousbtower: remove custom debug macro") Cc: stable # 3.12 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009153848.8664-5-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 44d6a3381804..9d4c52a7ebe0 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -296,6 +296,7 @@ static inline void tower_delete (struct lego_usb_tower *dev) kfree (dev->read_buffer); kfree (dev->interrupt_in_buffer); kfree (dev->interrupt_out_buffer); + usb_put_dev(dev->udev); kfree (dev); } @@ -810,7 +811,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device mutex_init(&dev->lock); - dev->udev = udev; + dev->udev = usb_get_dev(udev); dev->open_count = 0; dev->disconnected = 0; From 58ecf131e74620305175a7aa103f81350bb37570 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 17:38:46 +0200 Subject: [PATCH 538/591] USB: ldusb: fix NULL-derefs on driver unbind The driver was using its struct usb_interface pointer as an inverted disconnected flag, but was setting it to NULL before making sure all completion handlers had run. This could lead to a NULL-pointer dereference in a number of dev_dbg, dev_warn and dev_err statements in the completion handlers which relies on said pointer. Fix this by unconditionally stopping all I/O and preventing resubmissions by poisoning the interrupt URBs at disconnect and using a dedicated disconnected flag. This also makes sure that all I/O has completed by the time the disconnect callback returns. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009153848.8664-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 6581774bdfa4..f3108d85e768 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -153,6 +153,7 @@ MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in struct ld_usb { struct mutex mutex; /* locks this structure */ struct usb_interface *intf; /* save off the usb interface pointer */ + unsigned long disconnected:1; int open_count; /* number of times this port has been opened */ @@ -192,12 +193,10 @@ static void ld_usb_abort_transfers(struct ld_usb *dev) /* shutdown transfer */ if (dev->interrupt_in_running) { dev->interrupt_in_running = 0; - if (dev->intf) - usb_kill_urb(dev->interrupt_in_urb); + usb_kill_urb(dev->interrupt_in_urb); } if (dev->interrupt_out_busy) - if (dev->intf) - usb_kill_urb(dev->interrupt_out_urb); + usb_kill_urb(dev->interrupt_out_urb); } /** @@ -205,8 +204,6 @@ static void ld_usb_abort_transfers(struct ld_usb *dev) */ static void ld_usb_delete(struct ld_usb *dev) { - ld_usb_abort_transfers(dev); - /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); @@ -263,7 +260,7 @@ static void ld_usb_interrupt_in_callback(struct urb *urb) resubmit: /* resubmit if we're still running */ - if (dev->interrupt_in_running && !dev->buffer_overflow && dev->intf) { + if (dev->interrupt_in_running && !dev->buffer_overflow) { retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, @@ -392,7 +389,7 @@ static int ld_usb_release(struct inode *inode, struct file *file) retval = -ENODEV; goto unlock_exit; } - if (dev->intf == NULL) { + if (dev->disconnected) { /* the device was unplugged before the file was released */ mutex_unlock(&dev->mutex); /* unlock here as ld_usb_delete frees dev */ @@ -423,7 +420,7 @@ static __poll_t ld_usb_poll(struct file *file, poll_table *wait) dev = file->private_data; - if (!dev->intf) + if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); @@ -462,7 +459,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, } /* verify that the device wasn't unplugged */ - if (dev->intf == NULL) { + if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; @@ -542,7 +539,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, } /* verify that the device wasn't unplugged */ - if (dev->intf == NULL) { + if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; @@ -764,6 +761,9 @@ static void ld_usb_disconnect(struct usb_interface *intf) /* give back our minor */ usb_deregister_dev(intf, &ld_usb_class); + usb_poison_urb(dev->interrupt_in_urb); + usb_poison_urb(dev->interrupt_out_urb); + mutex_lock(&dev->mutex); /* if the device is not opened, then we clean up right now */ @@ -771,7 +771,7 @@ static void ld_usb_disconnect(struct usb_interface *intf) mutex_unlock(&dev->mutex); ld_usb_delete(dev); } else { - dev->intf = NULL; + dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); From 123a0f125fa3d2104043697baa62899d9e549272 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 17:38:44 +0200 Subject: [PATCH 539/591] USB: adutux: fix use-after-free on release The driver was accessing its struct usb_device in its release() callback without holding a reference. This would lead to a use-after-free whenever the device was disconnected while the character device was still open. Fixes: 66d4bc30d128 ("USB: adutux: remove custom debug macro") Cc: stable # 3.12 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009153848.8664-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/adutux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index f9efec719359..6f5edb9fc61e 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -149,6 +149,7 @@ static void adu_delete(struct adu_device *dev) kfree(dev->read_buffer_secondary); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); + usb_put_dev(dev->udev); kfree(dev); } @@ -664,7 +665,7 @@ static int adu_probe(struct usb_interface *interface, mutex_init(&dev->mtx); spin_lock_init(&dev->buflock); - dev->udev = udev; + dev->udev = usb_get_dev(udev); init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); From 93ddb1f56ae102f14f9e46a9a9c8017faa970003 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 17:38:45 +0200 Subject: [PATCH 540/591] USB: chaoskey: fix use-after-free on release The driver was accessing its struct usb_interface in its release() callback without holding a reference. This would lead to a use-after-free whenever the device was disconnected while the character device was still open. Fixes: 66e3e591891d ("usb: Add driver for Altus Metrum ChaosKey device (v2)") Cc: stable # 4.1 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009153848.8664-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/chaoskey.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index cf5828ce927a..34e6cd6f40d3 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -98,6 +98,7 @@ static void chaoskey_free(struct chaoskey *dev) usb_free_urb(dev->urb); kfree(dev->name); kfree(dev->buf); + usb_put_intf(dev->interface); kfree(dev); } } @@ -145,6 +146,8 @@ static int chaoskey_probe(struct usb_interface *interface, if (dev == NULL) goto out; + dev->interface = usb_get_intf(interface); + dev->buf = kmalloc(size, GFP_KERNEL); if (dev->buf == NULL) @@ -174,8 +177,6 @@ static int chaoskey_probe(struct usb_interface *interface, goto out; } - dev->interface = interface; - dev->in_ep = in_ep; if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) From edc4746f253d907d048de680a621e121517f484b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:41 +0200 Subject: [PATCH 541/591] USB: iowarrior: fix use-after-free on disconnect A recent fix addressing a deadlock on disconnect introduced a new bug by moving the present flag out of the critical section protected by the driver-data mutex. This could lead to a racing release() freeing the driver data before disconnect() is done with it. Due to insufficient locking a related use-after-free could be triggered also before the above mentioned commit. Specifically, the driver needs to hold the driver-data mutex also while checking the opened flag at disconnect(). Fixes: c468a8aa790e ("usb: iowarrior: fix deadlock on disconnect") Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Cc: stable # 2.6.21 Reported-by: syzbot+0761012cebf7bdb38137@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index f5bed9f29e56..4fe1d3267b3c 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -866,8 +866,6 @@ static void iowarrior_disconnect(struct usb_interface *interface) dev = usb_get_intfdata(interface); mutex_lock(&iowarrior_open_disc_lock); usb_set_intfdata(interface, NULL); - /* prevent device read, write and ioctl */ - dev->present = 0; minor = dev->minor; mutex_unlock(&iowarrior_open_disc_lock); @@ -878,8 +876,7 @@ static void iowarrior_disconnect(struct usb_interface *interface) mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ - - mutex_unlock(&dev->mutex); + dev->present = 0; if (dev->opened) { /* There is a process that holds a filedescriptor to the device , @@ -889,8 +886,10 @@ static void iowarrior_disconnect(struct usb_interface *interface) usb_kill_urb(dev->int_in_urb); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); + mutex_unlock(&dev->mutex); } else { /* no process is using the device, cleanup now */ + mutex_unlock(&dev->mutex); iowarrior_delete(dev); } From 80cd5479b525093a56ef768553045741af61b250 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:42 +0200 Subject: [PATCH 542/591] USB: iowarrior: fix use-after-free on release The driver was accessing its struct usb_interface from its release() callback without holding a reference. This would lead to a use-after-free whenever debugging was enabled and the device was disconnected while its character device was open. Fixes: 549e83500b80 ("USB: iowarrior: Convert local dbg macro to dev_dbg") Cc: stable # 3.16 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 4fe1d3267b3c..6841267820c6 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -243,6 +243,7 @@ static inline void iowarrior_delete(struct iowarrior *dev) kfree(dev->int_in_buffer); usb_free_urb(dev->int_in_urb); kfree(dev->read_queue); + usb_put_intf(dev->interface); kfree(dev); } @@ -764,7 +765,7 @@ static int iowarrior_probe(struct usb_interface *interface, init_waitqueue_head(&dev->write_wait); dev->udev = udev; - dev->interface = interface; + dev->interface = usb_get_intf(interface); iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); From b5f8d46867ca233d773408ffbe691a8062ed718f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:43 +0200 Subject: [PATCH 543/591] USB: iowarrior: fix use-after-free after driver unbind Make sure to stop also the asynchronous write URBs on disconnect() to avoid use-after-free in the completion handler after driver unbind. Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Cc: stable # 2.6.21: 51a2f077c44e ("USB: introduce usb_anchor") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 6841267820c6..f405fa734bcc 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -87,6 +87,7 @@ struct iowarrior { char chip_serial[9]; /* the serial number string of the chip connected */ int report_size; /* number of bytes in a report */ u16 product_id; + struct usb_anchor submitted; }; /*--------------*/ @@ -425,11 +426,13 @@ static ssize_t iowarrior_write(struct file *file, retval = -EFAULT; goto error; } + usb_anchor_urb(int_out_urb, &dev->submitted); retval = usb_submit_urb(int_out_urb, GFP_KERNEL); if (retval) { dev_dbg(&dev->interface->dev, "submit error %d for urb nr.%d\n", retval, atomic_read(&dev->write_busy)); + usb_unanchor_urb(int_out_urb); goto error; } /* submit was ok */ @@ -770,6 +773,8 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); + init_usb_anchor(&dev->submitted); + res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-in endpoint found\n"); @@ -885,6 +890,7 @@ static void iowarrior_disconnect(struct usb_interface *interface) Deleting the device is postponed until close() was called. */ usb_kill_urb(dev->int_in_urb); + usb_kill_anchored_urbs(&dev->submitted); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); mutex_unlock(&dev->mutex); From 7c5b971d623fdb40c03205e99f9ef68002b34726 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:44 +0200 Subject: [PATCH 544/591] USB: iowarrior: drop redundant disconnect mutex Drop the redundant disconnect mutex which was introduced after the open-disconnect race had been addressed generally in USB core by commit d4ead16f50f9 ("USB: prevent char device open/deregister race"). Specifically, the rw-semaphore in core guarantees that all calls to open() will have completed and that no new calls to open() will occur after usb_deregister_dev() returns. Hence there is no need use the driver data as an inverted disconnected flag. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-5-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index f405fa734bcc..d844c2098e42 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -58,7 +58,6 @@ MODULE_LICENSE("GPL"); static DEFINE_MUTEX(iowarrior_mutex); static struct usb_driver iowarrior_driver; -static DEFINE_MUTEX(iowarrior_open_disc_lock); /*--------------*/ /* data */ @@ -601,16 +600,13 @@ static int iowarrior_open(struct inode *inode, struct file *file) return -ENODEV; } - mutex_lock(&iowarrior_open_disc_lock); dev = usb_get_intfdata(interface); if (!dev) { - mutex_unlock(&iowarrior_open_disc_lock); mutex_unlock(&iowarrior_mutex); return -ENODEV; } mutex_lock(&dev->mutex); - mutex_unlock(&iowarrior_open_disc_lock); /* Only one process can open each device, no sharing. */ if (dev->opened) { @@ -842,7 +838,6 @@ static int iowarrior_probe(struct usb_interface *interface, if (retval) { /* something prevented us from registering this driver */ dev_err(&interface->dev, "Not able to get a minor for this device.\n"); - usb_set_intfdata(interface, NULL); goto error; } @@ -866,16 +861,8 @@ error: */ static void iowarrior_disconnect(struct usb_interface *interface) { - struct iowarrior *dev; - int minor; - - dev = usb_get_intfdata(interface); - mutex_lock(&iowarrior_open_disc_lock); - usb_set_intfdata(interface, NULL); - - minor = dev->minor; - mutex_unlock(&iowarrior_open_disc_lock); - /* give back our minor - this will call close() locks need to be dropped at this point*/ + struct iowarrior *dev = usb_get_intfdata(interface); + int minor = dev->minor; usb_deregister_dev(interface, &iowarrior_class); From 8d33e828f72c216ae264ad39088a595d9a6cc95c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:45 +0200 Subject: [PATCH 545/591] USB: iowarrior: drop redundant iowarrior mutex Drop the redundant iowarrior mutex introduced by commit 925ce689bb31 ("USB: autoconvert trivial BKL users to private mutex") which replaced an earlier BKL use. The lock serialised calls to open() against other open() and ioctl(), but neither is needed. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-6-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index d844c2098e42..ad29ef51e53f 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -54,9 +54,6 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -/* Module parameters */ -static DEFINE_MUTEX(iowarrior_mutex); - static struct usb_driver iowarrior_driver; /*--------------*/ @@ -480,8 +477,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, if (!buffer) return -ENOMEM; - /* lock this object */ - mutex_lock(&iowarrior_mutex); mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ @@ -574,7 +569,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, error_out: /* unlock the device */ mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_mutex); kfree(buffer); return retval; } @@ -589,22 +583,18 @@ static int iowarrior_open(struct inode *inode, struct file *file) int subminor; int retval = 0; - mutex_lock(&iowarrior_mutex); subminor = iminor(inode); interface = usb_find_interface(&iowarrior_driver, subminor); if (!interface) { - mutex_unlock(&iowarrior_mutex); printk(KERN_ERR "%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } dev = usb_get_intfdata(interface); - if (!dev) { - mutex_unlock(&iowarrior_mutex); + if (!dev) return -ENODEV; - } mutex_lock(&dev->mutex); @@ -628,7 +618,6 @@ static int iowarrior_open(struct inode *inode, struct file *file) out: mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_mutex); return retval; } From ebb2fe57a51c630e0f852becbbdd295ad5d60514 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 12:48:46 +0200 Subject: [PATCH 546/591] USB: iowarrior: use pr_err() Replace the one remaining printk with pr_err(). Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009104846.5925-7-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index ad29ef51e53f..dce44fbf031f 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -587,7 +587,7 @@ static int iowarrior_open(struct inode *inode, struct file *file) interface = usb_find_interface(&iowarrior_driver, subminor); if (!interface) { - printk(KERN_ERR "%s - error, can't find device for minor %d\n", + pr_err("%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } From ff30283a8de4f978dad120936c1af507d01a6a98 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Oct 2019 13:46:53 -0700 Subject: [PATCH 547/591] serial: fix kernel-doc warning in comments Fix Sphinx warning in serial_core.c: ../drivers/tty/serial/serial_core.c:1969: WARNING: Definition list ends without a blank line; unexpected unindent. Fixes: 73abaf87f01b ("serial: earlycon: Refactor parse_options into serial core") Signed-off-by: Randy Dunlap Cc: Peter Hurley Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/e989641c-224a-1090-e596-e7cc800bed44@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6e713be1d4e9..c4a414a46c7f 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1964,8 +1964,10 @@ uart_get_console(struct uart_port *ports, int nr, struct console *co) * console=,io|mmio|mmio16|mmio32|mmio32be|mmio32native,, * * The optional form + * * earlycon=,0x, * console=,0x, + * * is also accepted; the returned @iotype will be UPIO_MEM. * * Returns 0 on success or -EINVAL on failure From 31a8d8fa84c51d3ab00bf059158d5de6178cf890 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 9 Oct 2019 17:49:19 +0800 Subject: [PATCH 548/591] tty: serial: imx: Use platform_get_irq_optional() for optional IRQs All i.MX SoCs except i.MX1 have ONLY one necessary IRQ, use platform_get_irq_optional() to get second/third IRQ which are optional to avoid below error message during probe: [ 0.726219] imx-uart 30860000.serial: IRQ index 1 not found [ 0.731329] imx-uart 30860000.serial: IRQ index 2 not found Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Anson Huang Link: https://lore.kernel.org/r/1570614559-11900-1-git-send-email-Anson.Huang@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 87c58f9f6390..5e08f2657b90 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2222,8 +2222,8 @@ static int imx_uart_probe(struct platform_device *pdev) return PTR_ERR(base); rxirq = platform_get_irq(pdev, 0); - txirq = platform_get_irq(pdev, 1); - rtsirq = platform_get_irq(pdev, 2); + txirq = platform_get_irq_optional(pdev, 1); + rtsirq = platform_get_irq_optional(pdev, 2); sport->port.dev = &pdev->dev; sport->port.mapbase = res->start; From aafb00a977cf7d81821f7c9d12e04c558c22dc3c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2019 17:38:48 +0200 Subject: [PATCH 549/591] USB: yurex: fix NULL-derefs on disconnect The driver was using its struct usb_interface pointer as an inverted disconnected flag, but was setting it to NULL without making sure all code paths that used it were done with it. Before commit ef61eb43ada6 ("USB: yurex: Fix protection fault after device removal") this included the interrupt-in completion handler, but there are further accesses in dev_err and dev_dbg statements in yurex_write() and the driver-data destructor (sic!). Fix this by unconditionally stopping also the control URB at disconnect and by using a dedicated disconnected flag. Note that we need to take a reference to the struct usb_interface to avoid a use-after-free in the destructor whenever the device was disconnected while the character device was still open. Fixes: aadd6472d904 ("USB: yurex.c: remove dbg() usage") Fixes: 45714104b9e8 ("USB: yurex.c: remove err() usage") Cc: stable # 3.5: ef61eb43ada6 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191009153848.8664-6-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 8d52d4336c29..be0505b8b5d4 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -60,6 +60,7 @@ struct usb_yurex { struct kref kref; struct mutex io_mutex; + unsigned long disconnected:1; struct fasync_struct *async_queue; wait_queue_head_t waitq; @@ -107,6 +108,7 @@ static void yurex_delete(struct kref *kref) dev->int_buffer, dev->urb->transfer_dma); usb_free_urb(dev->urb); } + usb_put_intf(dev->interface); usb_put_dev(dev->udev); kfree(dev); } @@ -205,7 +207,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ init_waitqueue_head(&dev->waitq); dev->udev = usb_get_dev(interface_to_usbdev(interface)); - dev->interface = interface; + dev->interface = usb_get_intf(interface); /* set up the endpoint information */ iface_desc = interface->cur_altsetting; @@ -316,8 +318,9 @@ static void yurex_disconnect(struct usb_interface *interface) /* prevent more I/O from starting */ usb_poison_urb(dev->urb); + usb_poison_urb(dev->cntl_urb); mutex_lock(&dev->io_mutex); - dev->interface = NULL; + dev->disconnected = 1; mutex_unlock(&dev->io_mutex); /* wakeup waiters */ @@ -405,7 +408,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, dev = file->private_data; mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* already disconnected */ + if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); return -ENODEV; } @@ -440,7 +443,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, goto error; mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* already disconnected */ + if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); retval = -ENODEV; goto error; From 51d8a7eca67784b155a07aeab4bfb9f63ebaaf9e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 8 Oct 2019 15:01:59 +0200 Subject: [PATCH 550/591] binder: prevent UAF read in print_binder_transaction_log_entry() When a binder transaction is initiated on a binder device coming from a binderfs instance, a pointer to the name of the binder device is stashed in the binder_transaction_log_entry's context_name member. Later on it is used to print the name in print_binder_transaction_log_entry(). By the time print_binder_transaction_log_entry() accesses context_name binderfs_evict_inode() might have already freed the associated memory thereby causing a UAF. Do the simple thing and prevent this by copying the name of the binder device instead of stashing a pointer to it. Reported-by: Jann Horn Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") Link: https://lore.kernel.org/r/CAG48ez14Q0-F8LqsvcNbyR2o6gPW8SHXsm4u5jmD9MpsteM2Tw@mail.gmail.com Signed-off-by: Christian Brauner Reviewed-by: Joel Fernandes (Google) Acked-by: Todd Kjos Reviewed-by: Hridya Valsaraju Link: https://lore.kernel.org/r/20191008130159.10161-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 4 +++- drivers/android/binder_internal.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c0a491277aca..5b9ac2122e89 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ #include #include +#include #include @@ -2876,7 +2878,7 @@ static void binder_transaction(struct binder_proc *proc, e->target_handle = tr->target.handle; e->data_size = tr->data_size; e->offsets_size = tr->offsets_size; - e->context_name = proc->context->name; + strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME); if (reply) { binder_inner_proc_lock(proc); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index bd47f7f72075..ae991097d14d 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -130,7 +130,7 @@ struct binder_transaction_log_entry { int return_error_line; uint32_t return_error; uint32_t return_error_param; - const char *context_name; + char context_name[BINDERFS_MAX_NAME + 1]; }; struct binder_transaction_log { From 5dc54a06f6e575f146492661129d24f3b50d17bb Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 30 Sep 2019 16:12:50 -0400 Subject: [PATCH 551/591] binder: Fix comment headers on binder_alloc_prepare_to_free() binder_alloc_buffer_lookup() doesn't exist and is named "binder_alloc_prepare_to_free()". Correct the code comments to reflect this. Signed-off-by: Joel Fernandes (Google) Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20190930201250.139554-1-joel@joelfernandes.org Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 6d79a1b0d446..d42a8b2f636a 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -156,7 +156,7 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( } /** - * binder_alloc_buffer_lookup() - get buffer given user ptr + * binder_alloc_prepare_to_free() - get buffer given user ptr * @alloc: binder_alloc for this proc * @user_ptr: User pointer to buffer data * From e0b0cb9388642c104838fac100a4af32745621e2 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 30 Sep 2019 15:42:22 -0500 Subject: [PATCH 552/591] virt: vbox: fix memory leak in hgcm_call_preprocess_linaddr In hgcm_call_preprocess_linaddr memory is allocated for bounce_buf but is not released if copy_form_user fails. In order to prevent memory leak in case of failure, the assignment to bounce_buf_ret is moved before the error check. This way the allocated bounce_buf will be released by the caller. Fixes: 579db9d45cb4 ("virt: Add vboxguest VMMDEV communication code") Signed-off-by: Navid Emamdoost Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20190930204223.3660-1-navid.emamdoost@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index 75fd140b02ff..43c391626a00 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -220,6 +220,8 @@ static int hgcm_call_preprocess_linaddr( if (!bounce_buf) return -ENOMEM; + *bounce_buf_ret = bounce_buf; + if (copy_in) { ret = copy_from_user(bounce_buf, (void __user *)buf, len); if (ret) @@ -228,7 +230,6 @@ static int hgcm_call_preprocess_linaddr( memset(bounce_buf, 0, len); } - *bounce_buf_ret = bounce_buf; hgcm_call_add_pagelist_size(bounce_buf, len, extra); return 0; } From b058b2552edb4a6ce62922bf904149bbafd5fd9d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 20 Sep 2019 14:03:18 +0800 Subject: [PATCH 553/591] w1: ds250x: Fix build error without CRC16 If CRC16 is not set, building will fails: drivers/w1/slaves/w1_ds250x.o: In function `w1_ds2505_read_page': w1_ds250x.c:(.text+0x82f): undefined reference to `crc16' w1_ds250x.c:(.text+0x90a): undefined reference to `crc16' w1_ds250x.c:(.text+0x91a): undefined reference to `crc16' Reported-by: Hulk Robot Fixes: 25ec8710d9c2 ("w1: add DS2501, DS2502, DS2505 EPROM device driver") Signed-off-by: YueHaibing Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190920060318.35020-1-yuehaibing@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/w1/slaves/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig index ebed495b9e69..b7847636501d 100644 --- a/drivers/w1/slaves/Kconfig +++ b/drivers/w1/slaves/Kconfig @@ -103,6 +103,7 @@ config W1_SLAVE_DS2438 config W1_SLAVE_DS250X tristate "512b/1kb/16kb EPROM family support" + select CRC16 help Say Y here if you want to use a 1-wire 512b/1kb/16kb EPROM family device (DS250x). From 574878f98c05aaae7e046cd18be9195f8da38cbd Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Thu, 10 Oct 2019 16:32:09 +0800 Subject: [PATCH 554/591] xen/grant-table: remove unnecessary printing xen_auto_xlat_grant_frames.vaddr is definitely NULL in this case. So the address printing is unnecessary. Signed-off-by: Fuqian Huang Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/grant-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7ea6fb6a2e5d..49b381e104ef 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1363,8 +1363,7 @@ static int gnttab_setup(void) if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) { gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr; if (gnttab_shared.addr == NULL) { - pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n", - (unsigned long)xen_auto_xlat_grant_frames.vaddr); + pr_warn("gnttab share frames is not mapped!\n"); return -ENOMEM; } } From 5e48e55fb57a9026e6b3b6961def6d2aeb210659 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Oct 2019 14:30:46 +0200 Subject: [PATCH 555/591] MAINTAINERS: Remove Simon as Renesas SoC Co-Maintainer At the end of the v5.3 upstream kernel development cycle, Simon stepped down from his role as Renesas SoC maintainer. Remove his maintainership, git repository, and branch from the MAINTAINERS file, and add an entry to the CREDITS file to honor his work. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- CREDITS | 4 ++++ MAINTAINERS | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index 8b67a85844b5..031605d46b4d 100644 --- a/CREDITS +++ b/CREDITS @@ -1637,6 +1637,10 @@ S: Panoramastrasse 18 S: D-69126 Heidelberg S: Germany +N: Simon Horman +M: horms@verge.net.au +D: Renesas ARM/ARM64 SoC maintainer + N: Christopher Horn E: chorn@warwick.net D: Miscellaneous sysctl hacks diff --git a/MAINTAINERS b/MAINTAINERS index 94ce075907a0..d44d6732510d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2165,12 +2165,10 @@ F: arch/arm64/boot/dts/realtek/ F: Documentation/devicetree/bindings/arm/realtek.yaml ARM/RENESAS ARM64 ARCHITECTURE -M: Simon Horman M: Geert Uytterhoeven M: Magnus Damm L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm64/boot/dts/renesas/ @@ -2282,12 +2280,10 @@ S: Maintained F: drivers/media/platform/s5p-mfc/ ARM/SHMOBILE ARM ARCHITECTURE -M: Simon Horman M: Geert Uytterhoeven M: Magnus Damm L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm/boot/dts/emev2* From ee7f5225dc3cc7c19df1603597532ff34571f895 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 8 Oct 2019 14:41:55 -0500 Subject: [PATCH 556/591] xen: Stop abusing DT of_dma_configure API As the removed comments say, these aren't DT based devices. of_dma_configure() is going to stop allowing a NULL DT node and calling it will no longer work. The comment is also now out of date as of commit 9ab91e7c5c51 ("arm64: default to the direct mapping in get_arch_dma_ops"). Direct mapping is now the default rather than dma_dummy_ops. According to Stefano and Oleksandr, the only other part needed is setting the DMA masks and there's no reason to restrict the masks to 32-bits. So set the masks to 64 bits. Cc: Robin Murphy Cc: Julien Grall Cc: Nicolas Saenz Julienne Cc: Oleksandr Andrushchenko Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: Christoph Hellwig Cc: xen-devel@lists.xenproject.org Signed-off-by: Rob Herring Acked-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/gpu/drm/xen/xen_drm_front.c | 12 ++---------- drivers/xen/gntdev.c | 13 ++----------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 84aa4d61dc42..3406dfc9a605 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -716,17 +716,9 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, struct device *dev = &xb_dev->dev; int ret; - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * This makes the device return error on PRIME buffer import, which - * is not correct: to fix this call of_dma_configure() with a NULL - * node to set default DMA ops. - */ - dev->coherent_dma_mask = DMA_BIT_MASK(32); - ret = of_dma_configure(dev, NULL, true); + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret < 0) { - DRM_ERROR("Cannot setup DMA ops, ret %d", ret); + DRM_ERROR("Cannot setup DMA mask, ret %d", ret); return ret; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a446a7221e13..81401f386c9c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -34,9 +35,6 @@ #include #include #include -#ifdef CONFIG_XEN_GRANT_DMA_ALLOC -#include -#endif #include #include @@ -625,14 +623,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) flip->private_data = priv; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC priv->dma_dev = gntdev_miscdev.this_device; - - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * Fix this by calling of_dma_configure() with a NULL node to set - * default DMA ops. - */ - of_dma_configure(priv->dma_dev, NULL, true); + dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64)); #endif pr_debug("priv %p\n", priv); From 862488105b84ca744b3d8ff131e0fcfe10644be1 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 19 Sep 2019 11:44:27 +0530 Subject: [PATCH 557/591] nbd: fix possible sysfs duplicate warning 1. nbd_put takes the mutex and drops nbd->ref to 0. It then does idr_remove and drops the mutex. 2. nbd_genl_connect takes the mutex. idr_find/idr_for_each fails to find an existing device, so it does nbd_dev_add. 3. just before the nbd_put could call nbd_dev_remove or not finished totally, but if nbd_dev_add try to add_disk, we can hit: debugfs: Directory 'nbd1' with parent 'block' already present! This patch will make sure all the disk add/remove stuff are done by holding the nbd_index_mutex lock. Reported-by: Mike Christie Reviewed-by: Josef Bacik Signed-off-by: Xiubo Li Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac07e8c94c79..478aa86fc1f2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -248,8 +248,8 @@ static void nbd_put(struct nbd_device *nbd) if (refcount_dec_and_mutex_lock(&nbd->refs, &nbd_index_mutex)) { idr_remove(&nbd_index_idr, nbd->index); - mutex_unlock(&nbd_index_mutex); nbd_dev_remove(nbd); + mutex_unlock(&nbd_index_mutex); } } From 38dffe1e4dde1d3174fdce09d67370412843ebb5 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 10 Oct 2019 23:01:57 +0800 Subject: [PATCH 558/591] MIPS: elf_hwcap: Export userspace ASEs A Golang developer reported MIPS hwcap isn't reflecting instructions that the processor actually supported so programs can't apply optimized code at runtime. Thus we export the ASEs that can be used in userspace programs. Reported-by: Meng Zhuo Signed-off-by: Jiaxun Yang Cc: linux-mips@vger.kernel.org Cc: Paul Burton Cc: # 4.14+ Signed-off-by: Paul Burton --- arch/mips/include/uapi/asm/hwcap.h | 11 ++++++++++ arch/mips/kernel/cpu-probe.c | 33 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index a2aba4b059e6..1ade1daa4921 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -6,5 +6,16 @@ #define HWCAP_MIPS_R6 (1 << 0) #define HWCAP_MIPS_MSA (1 << 1) #define HWCAP_MIPS_CRC32 (1 << 2) +#define HWCAP_MIPS_MIPS16 (1 << 3) +#define HWCAP_MIPS_MDMX (1 << 4) +#define HWCAP_MIPS_MIPS3D (1 << 5) +#define HWCAP_MIPS_SMARTMIPS (1 << 6) +#define HWCAP_MIPS_DSP (1 << 7) +#define HWCAP_MIPS_DSP2 (1 << 8) +#define HWCAP_MIPS_DSP3 (1 << 9) +#define HWCAP_MIPS_MIPS16E2 (1 << 10) +#define HWCAP_LOONGSON_MMI (1 << 11) +#define HWCAP_LOONGSON_EXT (1 << 12) +#define HWCAP_LOONGSON_EXT2 (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c2eb392597bf..f521cbf934e7 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -2180,6 +2180,39 @@ void cpu_probe(void) elf_hwcap |= HWCAP_MIPS_MSA; } + if (cpu_has_mips16) + elf_hwcap |= HWCAP_MIPS_MIPS16; + + if (cpu_has_mdmx) + elf_hwcap |= HWCAP_MIPS_MDMX; + + if (cpu_has_mips3d) + elf_hwcap |= HWCAP_MIPS_MIPS3D; + + if (cpu_has_smartmips) + elf_hwcap |= HWCAP_MIPS_SMARTMIPS; + + if (cpu_has_dsp) + elf_hwcap |= HWCAP_MIPS_DSP; + + if (cpu_has_dsp2) + elf_hwcap |= HWCAP_MIPS_DSP2; + + if (cpu_has_dsp3) + elf_hwcap |= HWCAP_MIPS_DSP3; + + if (cpu_has_mips16e2) + elf_hwcap |= HWCAP_MIPS_MIPS16E2; + + if (cpu_has_loongson_mmi) + elf_hwcap |= HWCAP_LOONGSON_MMI; + + if (cpu_has_loongson_ext) + elf_hwcap |= HWCAP_LOONGSON_EXT; + + if (cpu_has_loongson_ext2) + elf_hwcap |= HWCAP_LOONGSON_EXT2; + if (cpu_has_vz) cpu_probe_vz(c); From 2f2b4fd674cadd8c6b40eb629e140a14db4068fd Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 10 Oct 2019 18:54:03 +0000 Subject: [PATCH 559/591] MIPS: Disable Loongson MMI instructions for kernel build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 9.x automatically enables support for Loongson MMI instructions when using some -march= flags, and then errors out when -msoft-float is specified with: cc1: error: ‘-mloongson-mmi’ must be used with ‘-mhard-float’ The kernel shouldn't be using these MMI instructions anyway, just as it doesn't use floating point instructions. Explicitly disable them in order to fix the build with GCC 9.x. Signed-off-by: Paul Burton Fixes: 3702bba5eb4f ("MIPS: Loongson: Add GCC 4.4 support for Loongson2E") Fixes: 6f7a251a259e ("MIPS: Loongson: Add basic Loongson 2F support") Fixes: 5188129b8c9f ("MIPS: Loongson-3: Improve -march option and move it to Platform") Cc: Huacai Chen Cc: Jiaxun Yang Cc: stable@vger.kernel.org # v2.6.32+ Cc: linux-mips@vger.kernel.org --- arch/mips/loongson64/Platform | 4 ++++ arch/mips/vdso/Makefile | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index c1a4d4dc4665..9f79908f5063 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -66,6 +66,10 @@ else $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) endif +# Some -march= flags enable MMI instructions, and GCC complains about that +# support being enabled alongside -msoft-float. Thus explicitly disable MMI. +cflags-y += $(call cc-option,-mno-loongson-mmi) + # # Loongson Machines' Support # diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 807f0f782f75..996a934ece7d 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -15,6 +15,7 @@ ccflags-vdso := \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ + $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ ifdef CONFIG_CC_IS_CLANG From 1047ec868332034d1fbcb2fae19fe6d4cb869ff2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 Oct 2019 09:58:54 -0400 Subject: [PATCH 560/591] NFSv4: Fix leak of clp->cl_acceptor string Our client can issue multiple SETCLIENTID operations to the same server in some circumstances. Ensure that calls to nfs4_proc_setclientid() after the first one do not overwrite the previously allocated cl_acceptor string. unreferenced object 0xffff888461031800 (size 32): comm "mount.nfs", pid 2227, jiffies 4294822467 (age 1407.749s) hex dump (first 32 bytes): 6e 66 73 40 6b 6c 69 6d 74 2e 69 62 2e 31 30 31 nfs@klimt.ib.101 35 67 72 61 6e 67 65 72 2e 6e 65 74 00 00 00 00 5granger.net.... backtrace: [<00000000ab820188>] __kmalloc+0x128/0x176 [<00000000eeaf4ec8>] gss_stringify_acceptor+0xbd/0x1a7 [auth_rpcgss] [<00000000e85e3382>] nfs4_proc_setclientid+0x34e/0x46c [nfsv4] [<000000003d9cf1fa>] nfs40_discover_server_trunking+0x7a/0xed [nfsv4] [<00000000b81c3787>] nfs4_discover_server_trunking+0x81/0x244 [nfsv4] [<000000000801b55f>] nfs4_init_client+0x1b0/0x238 [nfsv4] [<00000000977daf7f>] nfs4_set_client+0xfe/0x14d [nfsv4] [<0000000053a68a2a>] nfs4_create_server+0x107/0x1db [nfsv4] [<0000000088262019>] nfs4_remote_mount+0x2c/0x59 [nfsv4] [<00000000e84a2fd0>] legacy_get_tree+0x2d/0x4c [<00000000797e947c>] vfs_get_tree+0x20/0xc7 [<00000000ecabaaa8>] fc_mount+0xe/0x36 [<00000000f15fafc2>] vfs_kern_mount+0x74/0x8d [<00000000a3ff4e26>] nfs_do_root_mount+0x8a/0xa3 [nfsv4] [<00000000d1c2b337>] nfs4_try_mount+0x58/0xad [nfsv4] [<000000004c9bddee>] nfs_fs_mount+0x820/0x869 [nfs] Fixes: f11b2a1cfbf5 ("nfs4: copy acceptor name from context ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 11eafcfc490b..ab8ca20fd579 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6106,6 +6106,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, status = nfs4_call_sync_custom(&task_setup_data); if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } From af84537dbd1b39505d1f3d8023029b4a59666513 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 2 Oct 2019 10:40:55 -0400 Subject: [PATCH 561/591] SUNRPC: fix race to sk_err after xs_error_report Since commit 4f8943f80883 ("SUNRPC: Replace direct task wakeups from softirq context") there has been a race to the value of the sk_err if both XPRT_SOCK_WAKE_ERROR and XPRT_SOCK_WAKE_DISCONNECT are set. In that case, we may end up losing the sk_err value that existed when xs_error_report was called. Fix this by reverting to the previous behavior: instead of using SO_ERROR to retrieve the value at a later time (which might also return sk_err_soft), copy the sk_err value onto struct sock_xprt, and use that value to wake pending tasks. Signed-off-by: Benjamin Coddington Fixes: 4f8943f80883 ("SUNRPC: Replace direct task wakeups from softirq context") Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7638dbe7bc50..a940de03808d 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -61,6 +61,7 @@ struct sock_xprt { struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; + int xprt_err; /* * UDP socket buffer size parameters diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9ac88722fa83..70e52f567b2a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1249,19 +1249,21 @@ static void xs_error_report(struct sock *sk) { struct sock_xprt *transport; struct rpc_xprt *xprt; - int err; read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; transport = container_of(xprt, struct sock_xprt, xprt); - err = -sk->sk_err; - if (err == 0) + transport->xprt_err = -sk->sk_err; + if (transport->xprt_err == 0) goto out; dprintk("RPC: xs_error_report client %p, error=%d...\n", - xprt, -err); - trace_rpc_socket_error(xprt, sk->sk_socket, err); + xprt, -transport->xprt_err); + trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); + + /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ + smp_mb__before_atomic(); xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); out: read_unlock_bh(&sk->sk_callback_lock); @@ -2476,7 +2478,6 @@ static void xs_wake_write(struct sock_xprt *transport) static void xs_wake_error(struct sock_xprt *transport) { int sockerr; - int sockerr_len = sizeof(sockerr); if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) return; @@ -2485,9 +2486,7 @@ static void xs_wake_error(struct sock_xprt *transport) goto out; if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) goto out; - if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, - (char *)&sockerr, &sockerr_len) != 0) - goto out; + sockerr = xchg(&transport->xprt_err, 0); if (sockerr < 0) xprt_wake_pending_tasks(&transport->xprt, sockerr); out: From 7adf4eaf60f3d8c3584bed51fe7066d4dfc2cbe1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Oct 2019 21:42:58 -0600 Subject: [PATCH 562/591] io_uring: fix sequence logic for timeout requests We have two ways a request can be deferred: 1) It's a regular request that depends on another one 2) It's a timeout that tracks completions We have a shared helper to determine whether to defer, and that attempts to make the right decision based on the request. But we only have some of this information in the caller. Un-share the two timeout/defer helpers so the caller can use the right one. Fixes: 5262f567987d ("io_uring: IORING_OP_TIMEOUT support") Reported-by: yangerkun Reviewed-by: Jackie Liu Signed-off-by: Jens Axboe --- fs/io_uring.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2c44648217bd..38d274fc0f25 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -415,27 +415,27 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return ctx; } -static inline bool io_sequence_defer(struct io_ring_ctx *ctx, - struct io_kiocb *req) +static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, + struct io_kiocb *req) { - /* timeout requests always honor sequence */ - if (!(req->flags & REQ_F_TIMEOUT) && - (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) - return false; - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; } -static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx, - struct list_head *list) +static inline bool io_sequence_defer(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) + return false; + + return __io_sequence_defer(ctx, req); +} + +static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) { struct io_kiocb *req; - if (list_empty(list)) - return NULL; - - req = list_first_entry(list, struct io_kiocb, list); - if (!io_sequence_defer(ctx, req)) { + req = list_first_entry_or_null(&ctx->defer_list, struct io_kiocb, list); + if (req && !io_sequence_defer(ctx, req)) { list_del_init(&req->list); return req; } @@ -443,14 +443,17 @@ static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx, return NULL; } -static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) -{ - return __io_get_deferred_req(ctx, &ctx->defer_list); -} - static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx) { - return __io_get_deferred_req(ctx, &ctx->timeout_list); + struct io_kiocb *req; + + req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list); + if (req && !__io_sequence_defer(ctx, req)) { + list_del_init(&req->list); + return req; + } + + return NULL; } static void __io_commit_cqring(struct io_ring_ctx *ctx) From 2272905a4580f26630f7d652cc33935b59f96d4c Mon Sep 17 00:00:00 2001 From: Emmanuel Nicolet Date: Tue, 8 Oct 2019 16:13:42 +0200 Subject: [PATCH 563/591] spufs: fix a crash in spufs_create_root() The spu_fs_context was not set in fc->fs_private, this caused a crash when accessing ctx->mode in spufs_create_root(). Fixes: d2e0981c3b9a ("vfs: Convert spufs to use the new mount API") Signed-off-by: Emmanuel Nicolet Signed-off-by: Michael Ellerman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20191008141342.GA266797@gmail.com --- arch/powerpc/platforms/cell/spufs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1d93e55a2de1..2dd452a047cd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -761,6 +761,7 @@ static int spufs_init_fs_context(struct fs_context *fc) ctx->gid = current_gid(); ctx->mode = 0755; + fc->fs_private = ctx; fc->s_fs_info = sbi; fc->ops = &spufs_context_ops; return 0; From 10deeac921647c929c67752d377f22e632d55d1c Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Sep 2019 10:44:47 -0700 Subject: [PATCH 564/591] MAINTAINERS: kgdb: Add myself as a reviewer for kgdb/kdb I'm interested in kdb / kgdb and have sent various fixes over the years. I'd like to get CCed on patches so I can be aware of them and also help review. Signed-off-by: Douglas Anderson Acked-by: Daniel Thompson Link: https://lore.kernel.org/r/20190920104404.1.I237e68e8825e2d6ac26f8e847f521fe2fcc3705a@changeid Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..acdd996774d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9075,6 +9075,7 @@ F: security/keys/ KGDB / KDB /debug_core M: Jason Wessel M: Daniel Thompson +R: Douglas Anderson W: http://kgdb.wiki.kernel.org/ L: kgdb-bugreport@lists.sourceforge.net T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git From 442f1e746e8187b9deb1590176f6b0ff19686b11 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 30 Sep 2019 14:45:22 -0700 Subject: [PATCH 565/591] firmware: google: increment VPD key_len properly Commit 4b708b7b1a2c ("firmware: google: check if size is valid when decoding VPD data") adds length checks, but the new vpd_decode_entry() function botched the logic -- it adds the key length twice, instead of adding the key and value lengths separately. On my local system, this means vpd.c's vpd_section_create_attribs() hits an error case after the first attribute it parses, since it's no longer looking at the correct offset. With this patch, I'm back to seeing all the correct attributes in /sys/firmware/vpd/... Fixes: 4b708b7b1a2c ("firmware: google: check if size is valid when decoding VPD data") Cc: Cc: Hung-Te Lin Signed-off-by: Brian Norris Reviewed-by: Stephen Boyd Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190930214522.240680-1-briannorris@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/vpd_decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/google/vpd_decode.c b/drivers/firmware/google/vpd_decode.c index dda525c0f968..5c6f2a74f104 100644 --- a/drivers/firmware/google/vpd_decode.c +++ b/drivers/firmware/google/vpd_decode.c @@ -52,7 +52,7 @@ static int vpd_decode_entry(const u32 max_len, const u8 *input_buf, if (max_len - consumed < *entry_len) return VPD_FAIL; - consumed += decoded_len; + consumed += *entry_len; *_consumed = consumed; return VPD_OK; } From 062795fcdcb2d22822fb42644b1d76a8ad8439b3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 8 Oct 2019 17:02:32 +0200 Subject: [PATCH 566/591] s390/uaccess: avoid (false positive) compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on inlining decisions by the compiler, __get/put_user_fn might become out of line. Then the compiler is no longer able to tell that size can only be 1,2,4 or 8 due to the check in __get/put_user resulting in false positives like ./arch/s390/include/asm/uaccess.h: In function ‘__put_user_fn’: ./arch/s390/include/asm/uaccess.h:113:9: warning: ‘rc’ may be used uninitialized in this function [-Wmaybe-uninitialized] 113 | return rc; | ^~ ./arch/s390/include/asm/uaccess.h: In function ‘__get_user_fn’: ./arch/s390/include/asm/uaccess.h:143:9: warning: ‘rc’ may be used uninitialized in this function [-Wmaybe-uninitialized] 143 | return rc; | ^~ These functions are supposed to be always inlined. Mark it as such. Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bd2fd9a7821d..a470f1fa9f2a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x010000UL; int rc; @@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x01UL; int rc; From 48f9bcf91461b43249949f4e172b5c0245dde751 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 5 Oct 2019 09:46:41 -0700 Subject: [PATCH 567/591] net: sctp: Rename fallthrough label to unhandled fallthrough will become a pseudo reserved keyword so this only use of fallthrough is better renamed to allow it. Signed-off-by: Joe Perches Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Acked-by: Neil Horman Signed-off-by: Linus Torvalds --- net/sctp/sm_make_chunk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e41ed2e0ae7d..48d63956a68c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2155,7 +2155,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_SET_PRIMARY: if (ep->asconf_enable) break; - goto fallthrough; + goto unhandled; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ @@ -2166,11 +2166,11 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_FWD_TSN_SUPPORT: if (ep->prsctp_enable) break; - goto fallthrough; + goto unhandled; case SCTP_PARAM_RANDOM: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; /* SCTP-AUTH: Secion 6.1 * If the random number is not 32 byte long the association @@ -2187,7 +2187,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_CHUNKS: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; /* SCTP-AUTH: Section 3.2 * The CHUNKS parameter MUST be included once in the INIT or @@ -2203,7 +2203,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_HMAC_ALGO: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; hmacs = (struct sctp_hmac_algo_param *)param.p; n_elt = (ntohs(param.p->length) - @@ -2226,7 +2226,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, retval = SCTP_IERROR_ABORT; } break; -fallthrough: +unhandled: default: pr_debug("%s: unrecognized param:%d for chunk:%d\n", __func__, ntohs(param.p->type), cid); From 294f69e662d1570703e9b56e95be37a9fd3afba5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 5 Oct 2019 09:46:42 -0700 Subject: [PATCH 568/591] compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use Reserve the pseudo keyword 'fallthrough' for the ability to convert the various case block /* fallthrough */ style comments to appear to be an actual reserved word with the same gcc case block missing fallthrough warning capability. All switch/case blocks now should end in one of: break; fallthrough; goto