From 0b9a29add43273e64ef45472e08d38116fee1d82 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Apr 2016 14:30:16 -0700 Subject: [PATCH 0001/1050] arm: Use _rcuidle tracepoint to allow use from idle Testing on ARM encountered the following pair of lockdep-RCU splats: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc4-next-20160422 #1 Not tainted ------------------------------- include/trace/events/power.h:328 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc4-next-20160422 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (pwrdm_set_next_pwrst+0xf8/0x1cc) [] (pwrdm_set_next_pwrst) from [] (omap3_enter_idle_bm+0x1b8/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (_pwrdm_state_switch+0x188/0x32c) [] (_pwrdm_state_switch) from [] (_pwrdm_post_transition_cb+0xc/0x14) [] (_pwrdm_post_transition_cb) from [] (pwrdm_for_each+0x30/0x5c) [] (pwrdm_for_each) from [] (pwrdm_post_transition+0x24/0x30) [] (pwrdm_post_transition) from [] (omap_sram_idle+0xfc/0x240) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xf0/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ These are caused by event tracing from the idle loop, and they were exposed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()"), which suppressed some false negatives. The current commit therefore adds the _rcuidle suffix to make RCU aware of this implicit use of RCU by event tracing, thus preventing both splats. Reported-by: Guenter Roeck Signed-off-by: Paul E. McKenney Tested-by: Guenter Roeck Tested-by: Tony Lindgren Cc: Russell King Reviewed-by: Steven Rostedt Cc: Cc: --- arch/arm/mach-omap2/powerdomain.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 78af6d8cf2e2..daf2753de7aa 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) trace_state = (PWRDM_TRACE_STATES_FLAG | ((next & OMAP_POWERSTATE_MASK) << 8) | ((prev & OMAP_POWERSTATE_MASK) << 0)); - trace_power_domain_target(pwrdm->name, trace_state, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, + trace_state, + smp_processor_id()); } break; default: @@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ - trace_power_domain_target(pwrdm->name, pwrst, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, pwrst, + smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } From 5ec97ba0713d118a1cd03ccc7677a13769253bbd Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 29 Apr 2016 14:42:33 +0300 Subject: [PATCH 0002/1050] iio: bmi160: Fix output data rate for accel Format is INT_PLUS_MICRO and micro odr part of ODR should be parts of a micro. Also s/8000/800 this is obviously a typo. Fixes: 77c4ad2d6a9 ("iio: imu: Add initial support for Bosch BMI160") Signed-off-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi160/bmi160_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 0bf92b06d7d8..cd9b75eeccd7 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -209,11 +209,11 @@ static const struct bmi160_scale_item bmi160_scale_table[] = { }; static const struct bmi160_odr bmi160_accel_odr[] = { - {0x01, 0, 78125}, - {0x02, 1, 5625}, - {0x03, 3, 125}, - {0x04, 6, 25}, - {0x05, 12, 5}, + {0x01, 0, 781250}, + {0x02, 1, 562500}, + {0x03, 3, 125000}, + {0x04, 6, 250000}, + {0x05, 12, 500000}, {0x06, 25, 0}, {0x07, 50, 0}, {0x08, 100, 0}, @@ -229,7 +229,7 @@ static const struct bmi160_odr bmi160_gyro_odr[] = { {0x08, 100, 0}, {0x09, 200, 0}, {0x0A, 400, 0}, - {0x0B, 8000, 0}, + {0x0B, 800, 0}, {0x0C, 1600, 0}, {0x0D, 3200, 0}, }; From c25d3f37be016b301f446a5257645c4845daf53c Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 29 Apr 2016 14:42:34 +0300 Subject: [PATCH 0003/1050] iio: bmi160: Fix ODR setting mask and val parameters of regmap_update_bits were reveresed. Fixes: 77c4ad2d6a9 ("iio: imu: Add initial support for Bosch BMI160") Signed-off-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi160/bmi160_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index cd9b75eeccd7..b8a290ec984e 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -364,8 +364,8 @@ int bmi160_set_odr(struct bmi160_data *data, enum bmi160_sensor_type t, return regmap_update_bits(data->regmap, bmi160_regs[t].config, - bmi160_odr_table[t].tbl[i].bits, - bmi160_regs[t].config_odr_mask); + bmi160_regs[t].config_odr_mask, + bmi160_odr_table[t].tbl[i].bits); } static int bmi160_get_odr(struct bmi160_data *data, enum bmi160_sensor_type t, From 04bf02175fe9577875fb8285cc2d08169fef613a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Apr 2016 12:03:31 +0300 Subject: [PATCH 0004/1050] iio: dac: ad5592r: Off by one bug in ad5592r_alloc_channels() The > here should be >= or we go beyond the end for the array. Signed-off-by: Dan Carpenter Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5592r-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 948f600e7059..69bde5909854 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -525,7 +525,7 @@ static int ad5592r_alloc_channels(struct ad5592r_state *st) device_for_each_child_node(st->dev, child) { ret = fwnode_property_read_u32(child, "reg", ®); - if (ret || reg > ARRAY_SIZE(st->channel_modes)) + if (ret || reg >= ARRAY_SIZE(st->channel_modes)) continue; ret = fwnode_property_read_u32(child, "adi,mode", &tmp); From 53dfc3b9e6c54d077f578f9016b6c8ab5f891e10 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 28 Apr 2016 14:02:41 +0200 Subject: [PATCH 0005/1050] iio: light: bh1780: return after write When writing a value using direct reg access from debugfs we need to return and not fall through to reading the value, lest we'll dereference a NULL pointer. Cc: Dan Carpenter Reported-by: Dan Carpenter Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index 72b364e4aa72..f83595334ff1 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c @@ -84,7 +84,7 @@ static int bh1780_debugfs_reg_access(struct iio_dev *indio_dev, int ret; if (!readval) - bh1780_write(bh1780, (u8)reg, (u8)writeval); + return bh1780_write(bh1780, (u8)reg, (u8)writeval); ret = bh1780_read(bh1780, (u8)reg); if (ret < 0) From 6b4e941875ca464e0bf737d7cdc9b72b008df6eb Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 5 May 2016 15:33:37 -0700 Subject: [PATCH 0006/1050] ARM: dts: omap5-board-common: Describe the voltage supply mapping accurately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OMAP5uEVM based platforms share a similar voltage rail map. This should be properly described in device tree, without this regulator core will be unable to determine the source voltage of LDOs such as LDO9 and SMPS10 which could be configured for bypass depending on the voltage requested of them. This results in conditions such as: ldo9: bypassed regulator has no supply! ldo9: failed to get the current voltage(-517) palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Cc: Peter Ujfalusi Signed-off-by: Nishanth Menon [tony@atomide.com: fixed to use palmas style in-supply] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index d0990606d16e..ebbaa140355b 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -14,6 +14,29 @@ display0 = &hdmi0; }; + vmain: fixedregulator-vmain { + compatible = "regulator-fixed"; + regulator-name = "vmain"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vsys_cobra: fixedregulator-vsys_cobra { + compatible = "regulator-fixed"; + regulator-name = "vsys_cobra"; + vin-supply = <&vmain>; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vdds_1v8_main: fixedregulator-vdds_1v8_main { + compatible = "regulator-fixed"; + regulator-name = "vdds_1v8_main"; + vin-supply = <&smps7_reg>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -409,6 +432,26 @@ ti,ldo6-vibrator; + smps123-in-supply = <&vsys_cobra>; + smps45-in-supply = <&vsys_cobra>; + smps6-in-supply = <&vsys_cobra>; + smps7-in-supply = <&vsys_cobra>; + smps8-in-supply = <&vsys_cobra>; + smps9-in-supply = <&vsys_cobra>; + smps10_out2-in-supply = <&vsys_cobra>; + smps10_out1-in-supply = <&vsys_cobra>; + ldo1-in-supply = <&vsys_cobra>; + ldo2-in-supply = <&vsys_cobra>; + ldo3-in-supply = <&vdds_1v8_main>; + ldo4-in-supply = <&vdds_1v8_main>; + ldo5-in-supply = <&vsys_cobra>; + ldo6-in-supply = <&vdds_1v8_main>; + ldo7-in-supply = <&vsys_cobra>; + ldo8-in-supply = <&vsys_cobra>; + ldo9-in-supply = <&vmmcsd_fixed>; + ldoln-in-supply = <&vsys_cobra>; + ldousb-in-supply = <&vsys_cobra>; + regulators { smps123_reg: smps123 { /* VDD_OPP_MPU */ From a7f1884554b81bd68cd435d72f09a3527629ac43 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 May 2016 14:43:54 +0200 Subject: [PATCH 0007/1050] netfilter: nfnetlink_queue: fix timestamp attribute Since 4.4 we erronously use timestamp of the netlink skb (which is zero). Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1066 Fixes: b28b1e826f818c30ea7 ("netfilter: nfnetlink_queue: use y2038 safe timestamp") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630a645b..e34256a0e7e9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -499,7 +499,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); From c0053bd50af57c4ebf032a9de1b07ca78c982452 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 6 Aug 2015 10:54:24 -0500 Subject: [PATCH 0008/1050] ARM: OMAP5 / DRA7: Introduce workaround for 801819 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add workaround for Cortex-A15 ARM erratum 801819 which says in summary that "A livelock can occur in the L2 cache arbitration that might prevent a snoop from completing. Under certain conditions this can cause the system to deadlock. " Recommended workaround is as follows: Do both of the following: 1) Do not use the write-back no-allocate memory type. 2) Do not issue write-back cacheable stores at any time when the cache is disabled (SCTLR.C=0) and the MMU is enabled (SCTLR.M=1). Because it is implementation defined whether cacheable stores update the cache when the cache is disabled it is not expected that any portable code will execute cacheable stores when the cache is disabled. For implementations of Cortex-A15 configured without the “L2 arbitration register slice” option (typically one or two core systems), you must also do the following: 3) Disable write-streaming in each CPU by setting ACTLR[28:25] = 0b1111 So, we provide an option to disable write streaming on OMAP5 and DRA7. It is a rare condition to occur and may be enabled selectively based on platform acceptance of risk. Applies to: A15 revisions r2p0, r2p1, r2p2, r2p3 or r2p4 and REVIDR[3] is set to 0. Based on ARM errata Document revision 18.0 (22 Nov 2013) Note: the configuration for the workaround needs to be done with each CPU bringup, since CPU0 bringup is done by bootloader, it is recommended to have the workaround in the bootloader, kernel also does ensure that CPU0 has the workaround and makes the workaround active when CPU1 gets active. With CONFIG_SMP disabled, it is expected to be done by the bootloader. This does show significant degradation in synthetic tests such as mbw (https://packages.qa.debian.org/m/mbw.html) mbw -n 100 100|grep AVG (on a test platform) Without enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13406 MiB: 100.00000 Copy: 745.913 MiB/s AVG Method: DUMB Elapsed: 0.06746 MiB: 100.00000 Copy: 1482.357 MiB/s AVG Method: MCBLOCK Elapsed: 0.03058 MiB: 100.00000 Copy: 3270.569 MiB/s After enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13757 MiB: 100.00000 Copy: 726.913 MiB/s AVG Method: DUMB Elapsed: 0.12024 MiB: 100.00000 Copy: 831.668 MiB/s AVG Method: MCBLOCK Elapsed: 0.09243 MiB: 100.00000 Copy: 1081.942 MiB/s Most benchmarks are designed for specific performance analysis, so overall usecase must be considered before making a decision to enable/disable the erratum workaround. Pending internal investigation, the erratum is kept disabled by default. Cc: Russell King Cc: Catalin Marinas Cc: Tony Lindgren Suggested-by: Richard Woodruff Suggested-by: Brad Griffis Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 8 ++++++ arch/arm/mach-omap2/omap-secure.h | 1 + arch/arm/mach-omap2/omap-smp.c | 48 +++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..a63d3fe2ca46 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -240,4 +240,12 @@ endmenu endif +config OMAP5_ERRATA_801819 + bool "Errata 801819: An eviction from L1 data cache might stall indefinitely" + depends on SOC_OMAP5 || SOC_DRA7XX + help + A livelock can occur in the L2 cache arbitration that might prevent + a snoop from completing. Under certain conditions this can cause the + system to deadlock. + endmenu diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index af2851fbcdf0..bae263fba640 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -46,6 +46,7 @@ #define OMAP5_DRA7_MON_SET_CNTFRQ_INDEX 0x109 #define OMAP5_MON_AMBA_IF_INDEX 0x108 +#define OMAP5_DRA7_MON_SET_ACR_INDEX 0x107 /* Secure PPA(Primary Protected Application) APIs */ #define OMAP4_PPA_L2_POR_INDEX 0x23 diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index c625cc10d9f9..8cd1de914ee4 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -50,6 +50,39 @@ void __iomem *omap4_get_scu_base(void) return scu_base; } +#ifdef CONFIG_OMAP5_ERRATA_801819 +void omap5_erratum_workaround_801819(void) +{ + u32 acr, revidr; + u32 acr_mask; + + /* REVIDR[3] indicates erratum fix available on silicon */ + asm volatile ("mrc p15, 0, %0, c0, c0, 6" : "=r" (revidr)); + if (revidr & (0x1 << 3)) + return; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + /* + * BIT(27) - Disables streaming. All write-allocate lines allocate in + * the L1 or L2 cache. + * BIT(25) - Disables streaming. All write-allocate lines allocate in + * the L1 cache. + */ + acr_mask = (0x3 << 25) | (0x3 << 27); + /* do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM erratum workaround 801819 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_erratum_workaround_801819(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -64,12 +97,15 @@ static void omap4_secondary_init(unsigned int cpu) omap_secure_dispatcher(OMAP4_PPA_CPU_ACTRL_SMP_INDEX, 4, 0, 0, 0, 0, 0); - /* - * Configure the CNTFRQ register for the secondary cpu's which - * indicates the frequency of the cpu local timers. - */ - if (soc_is_omap54xx() || soc_is_dra7xx()) + if (soc_is_omap54xx() || soc_is_dra7xx()) { + /* + * Configure the CNTFRQ register for the secondary cpu's which + * indicates the frequency of the cpu local timers. + */ set_cntfreq(); + /* Configure ACR to disable streaming WA for 801819 */ + omap5_erratum_workaround_801819(); + } /* * Synchronise with the boot thread. @@ -218,6 +254,8 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus) if (cpu_is_omap446x()) startup_addr = omap4460_secondary_startup; + if (soc_is_dra74x() || soc_is_omap54xx()) + omap5_erratum_workaround_801819(); /* * Write the address of secondary startup routine into the From 49111cd1c5498d2a356b0e51d74987dad0e88530 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: [PATCH 0009/1050] ARM: dts: Fix igepv5 audiopwon-gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Playing audio works on omap5-uevm, but produces an "Unhandled fault: imprecise external abort (0x1406) at 0x00000000" error on igepv5. Looks like the twl6040 audpwron GPIO pin is different for these boards. Let's fix the issue by configuring the audpwron in the board specific dts file. Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Acked-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 3 ++- arch/arm/boot/dts/omap5-igep0050.dts | 10 ++++++++++ arch/arm/boot/dts/omap5-uevm.dts | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index ebbaa140355b..c6aa65a68642 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -643,7 +643,8 @@ pinctrl-0 = <&twl6040_pins>; interrupts = ; /* IRQ_SYS_2N cascaded to gic */ - ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ + + /* audpwron gpio defined in the board specific dts */ vio-supply = <&smps7_reg>; v2v1-supply = <&smps9_reg>; diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 46ecb1dd3b5c..700966b85575 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -52,3 +52,13 @@ <&gpio7 3 0>; /* 195, SDA */ }; +&twl6040 { + ti,audpwron-gpio = <&gpio5 16 GPIO_ACTIVE_HIGH>; /* gpio line 144 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1c4, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_144 */ + OMAP5_IOPAD(0x1ca, PIN_OUTPUT | MUX_MODE6) /* perslimbus2_clock.gpio5_145 */ + >; +}; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 60b3fbb3bf07..a51e60518eb6 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -51,3 +51,13 @@ <&gpio9 1 GPIO_ACTIVE_HIGH>, /* TCA6424A P00, LS OE */ <&gpio7 1 GPIO_ACTIVE_HIGH>; /* GPIO 193, HPD */ }; + +&twl6040 { + ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1be, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_141 */ + >; +}; From e0e80d43fc0c5aace76a6c5f9462a1aec3e004f8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: [PATCH 0010/1050] ARM: dts: Fix uart wakeirq on omap5 by removing WAKEUP_EN for omaps The padconf register WAKEUP_EN is now handled in a generic way using Linux wakeirqs where pinctrl-single toggles the WAKEUP_EN bit when a wakeirq is enabled or disabled. At least omap5 gets confused if the WAKEUP_EN bit is set and the pin is not claimed as a wakeirq. The end result is that wakeirqs don't work properly as there is nothing handling the wakeirq. So let's just remove the WAKEUP_EN usage from dts files. Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-evm-37xx.dts | 2 +- arch/arm/boot/dts/omap3-n900.dts | 4 ++-- arch/arm/boot/dts/omap3-n950-n9.dtsi | 6 +++--- arch/arm/boot/dts/omap3-zoom3.dts | 6 +++--- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts index 76056ba92ced..ed449827c3d3 100644 --- a/arch/arm/boot/dts/omap3-evm-37xx.dts +++ b/arch/arm/boot/dts/omap3-evm-37xx.dts @@ -85,7 +85,7 @@ OMAP3_CORE1_IOPAD(0x2158, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_clk.sdmmc2_clk */ OMAP3_CORE1_IOPAD(0x215a, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_cmd.sdmmc2_cmd */ OMAP3_CORE1_IOPAD(0x215c, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat0.sdmmc2_dat0 */ - OMAP3_CORE1_IOPAD(0x215e, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ + OMAP3_CORE1_IOPAD(0x215e, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ OMAP3_CORE1_IOPAD(0x2160, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat2.sdmmc2_dat2 */ OMAP3_CORE1_IOPAD(0x2162, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat3.sdmmc2_dat3 */ >; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index b3c26a96a726..9c9e2ae6d6aa 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -288,7 +288,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ @@ -300,7 +300,7 @@ modem_pins: pinmux_modem { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE4) /* gpio 70 => cmt_apeslpx */ - OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio 72 => ape_rst_rq */ + OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | MUX_MODE4) /* gpio 72 => ape_rst_rq */ OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE4) /* gpio 73 => cmt_rst_rq */ OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE4) /* gpio 74 => cmt_en */ OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE4) /* gpio 75 => cmt_rst */ diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index a00ca761675d..927b17fc4ed8 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -97,7 +97,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE1) /* ssi1_rdy_rx */ @@ -110,7 +110,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE7) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE7) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLDOWN | MUX_MODE7) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE7) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE7) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE4) /* ssi1_rdy_rx */ @@ -120,7 +120,7 @@ modem_pins1: pinmux_modem_core1_pins { pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ + OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE4) /* gpio_88 (cmt_rst_rq) */ OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE4) /* gpio_93 (cmt_apeslpx) */ >; diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts index f19170bdcc1f..c29b41dc7b95 100644 --- a/arch/arm/boot/dts/omap3-zoom3.dts +++ b/arch/arm/boot/dts/omap3-zoom3.dts @@ -98,7 +98,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2174, PIN_INPUT_PULLUP | MUX_MODE0) /* uart2_cts.uart2_cts */ OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE0) /* uart2_rts.uart2_rts */ - OMAP3_CORE1_IOPAD(0x217a, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ + OMAP3_CORE1_IOPAD(0x217a, PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ OMAP3_CORE1_IOPAD(0x2178, PIN_OUTPUT | MUX_MODE0) /* uart2_tx.uart2_tx */ >; }; @@ -107,7 +107,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x219a, PIN_INPUT_PULLDOWN | MUX_MODE0) /* uart3_cts_rctx.uart3_cts_rctx */ OMAP3_CORE1_IOPAD(0x219c, PIN_OUTPUT | MUX_MODE0) /* uart3_rts_sd.uart3_rts_sd */ - OMAP3_CORE1_IOPAD(0x219e, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ + OMAP3_CORE1_IOPAD(0x219e, PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ OMAP3_CORE1_IOPAD(0x21a0, PIN_OUTPUT | MUX_MODE0) /* uart3_tx_irtx.uart3_tx_irtx */ >; }; @@ -125,7 +125,7 @@ pinctrl-single,pins = < OMAP3630_CORE2_IOPAD(0x25d8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_clk.sdmmc3_clk */ OMAP3630_CORE2_IOPAD(0x25e4, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d4.sdmmc3_dat0 */ - OMAP3630_CORE2_IOPAD(0x25e6, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ + OMAP3630_CORE2_IOPAD(0x25e6, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ OMAP3630_CORE2_IOPAD(0x25e8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d6.sdmmc3_dat2 */ OMAP3630_CORE2_IOPAD(0x25e2, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d3.sdmmc3_dat3 */ >; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index c6aa65a68642..9851b5767ec4 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -332,7 +332,7 @@ wlcore_irq_pin: pinmux_wlcore_irq_pin { pinctrl-single,pins = < - OMAP5_IOPAD(0x40, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + OMAP5_IOPAD(0x40, PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ >; }; }; From 6c05495d6d0507a6e7886265f82339bebf25cfed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: [PATCH 0011/1050] ARM: dts: Fix ldo7 source for HDMI on igepv5 Fix ldo7 source for HDMI on igepv5. Suggested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-igep0050.dts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 700966b85575..f75ce02fb398 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -35,6 +35,22 @@ }; }; +/* LDO4 is VPP1 - ball AD9 */ +&ldo4_reg { + regulator-min-microvolt = <2000000>; + regulator-max-microvolt = <2000000>; +}; + +/* + * LDO7 is used for HDMI: VDDA_DSIPORTA - ball AA33, VDDA_DSIPORTC - ball AE33, + * VDDA_HDMI - ball AN25 + */ +&ldo7_reg { + status = "okay"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; +}; + &omap5_pmx_core { i2c4_pins: pinmux_i2c4_pins { pinctrl-single,pins = < From 54c78870e4d70420b887d712a3e73a6783d5b51d Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:57 +0200 Subject: [PATCH 0012/1050] ARM: dts: Add non-removable to hsmmc on hp-t410 This will clean-up warnings at boot, since either that or cd-gpio{,s} are mandated by the dts specification of_get_named_gpiod_flags: can't parse 'cd-gpios' property of node '/ocp/mmc@47810000[0]' of_get_named_gpiod_flags: can't parse 'cd-gpio' property of node '/ocp/mmc@47810000[0]' v2: use the generic non-removable instead of ti,non-removable Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-t410.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 5d4313fd5a46..97000b521cea 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -53,6 +53,7 @@ dmas = <&edma_xbar 8 0 1 /* use SDTXEVT1 instead of MCASP0TX */ &edma_xbar 9 0 2>; /* use SDRXEVT1 instead of MCASP0RX */ dma-names = "tx", "rx"; + non-removable; }; &pincntl { From 1ddbef4d596ce51bf25825549bc5865de08a7e62 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:58 +0200 Subject: [PATCH 0013/1050] ARM: dts: disable mmc by default and enable when needed for dm814x This patch disable mmc nodes by default in the dm814x.dtsi and enable only when needed on a given dts v2: Disable un-used mmc nodes on the related boards dts files instead of from the included SOC dts Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-evm.dts | 8 ++++++++ arch/arm/boot/dts/dm8148-t410.dts | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index cbc17b0794b1..4128fa91823c 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -93,6 +93,10 @@ }; }; +&mmc1 { + status = "disabled"; +}; + &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&sd1_pins>; @@ -101,6 +105,10 @@ cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; }; +&mmc3 { + status = "disabled"; +}; + &pincntl { sd1_pins: pinmux_sd1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 97000b521cea..3f184863e0c5 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -45,6 +45,14 @@ phy-mode = "rgmii"; }; +&mmc1 { + status = "disabled"; +}; + +&mmc2 { + status = "disabled"; +}; + &mmc3 { pinctrl-names = "default"; pinctrl-0 = <&sd2_pins>; From 10ce2404ccab6828f86fb038e4888837f49f9f48 Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Wed, 4 May 2016 12:43:55 -0500 Subject: [PATCH 0014/1050] ARM: dts: dra7: Add gpmc dma channel Add dma channel information to the gpmc. Signed-off-by: Franklin S Cooper Jr Signed-off-by: Sekhar Nori Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index e0074014385a..3a8f3976f6f9 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1451,6 +1451,8 @@ ti,hwmods = "gpmc"; reg = <0x50000000 0x37c>; /* device IO registers */ interrupts = ; + dmas = <&edma_xbar 4 0>; + dma-names = "rxtx"; gpmc,num-cs = <8>; gpmc,num-waitpins = <2>; #address-cells = <2>; From 3d9f77be066ccf8d364928154552ed6ca910793b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 6 May 2016 08:37:57 -0500 Subject: [PATCH 0015/1050] ARM: dts: am57xx-idk-common: Fix input supply names Palmas Regulator is an exception and does not follow the standard "vin-supply" common definitions for all regulators, as a result of this, the input supplies are not reported to regulator framework, with the obvious result of not being appropriately mapped. Fix the same. Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-idk-common.dtsi | 32 +++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index b01a5948cdd0..0e63b9dff6e7 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -60,10 +60,26 @@ tps659038_pmic { compatible = "ti,tps659038-pmic"; + + smps12-in-supply = <&vmain>; + smps3-in-supply = <&vmain>; + smps45-in-supply = <&vmain>; + smps6-in-supply = <&vmain>; + smps7-in-supply = <&vmain>; + smps8-in-supply = <&vmain>; + smps9-in-supply = <&vmain>; + ldo1-in-supply = <&vmain>; + ldo2-in-supply = <&vmain>; + ldo3-in-supply = <&vmain>; + ldo4-in-supply = <&vmain>; + ldo9-in-supply = <&vmain>; + ldoln-in-supply = <&vmain>; + ldousb-in-supply = <&vmain>; + ldortc-in-supply = <&vmain>; + regulators { smps12_reg: smps12 { /* VDD_MPU */ - vin-supply = <&vmain>; regulator-name = "smps12"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -73,7 +89,6 @@ smps3_reg: smps3 { /* VDD_DDR EMIF1 EMIF2 */ - vin-supply = <&vmain>; regulator-name = "smps3"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; @@ -84,7 +99,6 @@ smps45_reg: smps45 { /* VDD_DSPEVE on AM572 */ /* VDD_IVA + VDD_DSP on AM571 */ - vin-supply = <&vmain>; regulator-name = "smps45"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -94,7 +108,6 @@ smps6_reg: smps6 { /* VDD_GPU */ - vin-supply = <&vmain>; regulator-name = "smps6"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -104,7 +117,6 @@ smps7_reg: smps7 { /* VDD_CORE */ - vin-supply = <&vmain>; regulator-name = "smps7"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1150000>; @@ -115,13 +127,11 @@ smps8_reg: smps8 { /* 5728 - VDD_IVAHD */ /* 5718 - N.C. test point */ - vin-supply = <&vmain>; regulator-name = "smps8"; }; smps9_reg: smps9 { /* VDD_3_3D */ - vin-supply = <&vmain>; regulator-name = "smps9"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -132,7 +142,6 @@ ldo1_reg: ldo1 { /* VDDSHV8 - VSDMMC */ /* NOTE: on rev 1.3a, data supply */ - vin-supply = <&vmain>; regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; @@ -142,7 +151,6 @@ ldo2_reg: ldo2 { /* VDDSH18V */ - vin-supply = <&vmain>; regulator-name = "ldo2"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -152,7 +160,6 @@ ldo3_reg: ldo3 { /* R1.3a 572x V1_8PHY_LDO3: USB, SATA */ - vin-supply = <&vmain>; regulator-name = "ldo3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -162,7 +169,6 @@ ldo4_reg: ldo4 { /* R1.3a 572x V1_8PHY_LDO4: PCIE, HDMI*/ - vin-supply = <&vmain>; regulator-name = "ldo4"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -174,7 +180,6 @@ ldo9_reg: ldo9 { /* VDD_RTC */ - vin-supply = <&vmain>; regulator-name = "ldo9"; regulator-min-microvolt = <840000>; regulator-max-microvolt = <1160000>; @@ -184,7 +189,6 @@ ldoln_reg: ldoln { /* VDDA_1V8_PLL */ - vin-supply = <&vmain>; regulator-name = "ldoln"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -194,7 +198,6 @@ ldousb_reg: ldousb { /* VDDA_3V_USB: VDDA_USBHS33 */ - vin-supply = <&vmain>; regulator-name = "ldousb"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -204,7 +207,6 @@ ldortc_reg: ldortc { /* VDDA_RTC */ - vin-supply = <&vmain>; regulator-name = "ldortc"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; From 65db875d110b8e517f81fa71c57a65fc6d0019ac Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 May 2016 13:20:52 -0500 Subject: [PATCH 0016/1050] ARM: OMAP2+: AM43XX: Enable fixes for Cortex-A9 errata This patch explicitly enables the fixes for the below errata applicable for AM43x Socs as was done for OMAP4. 754322: Faulty MMU translations following ASID switch 775420: A data cache maintenance operation which aborts, followed by an ISB, without any DSB in-between, might lead to deadlock Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a63d3fe2ca46..415a0bd0cda6 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -67,6 +67,8 @@ config SOC_AM43XX select HAVE_ARM_SCU select GENERIC_CLOCKEVENTS_BROADCAST select HAVE_ARM_TWD + select ARM_ERRATA_754322 + select ARM_ERRATA_775420 config SOC_DRA7XX bool "TI DRA7XX" From b44f788cce4086da00fd2f14b6b4d5abcf85f842 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:33 +0200 Subject: [PATCH 0017/1050] ARM: dts: igep00x0: Add SD card-detect. Fix SD card remove/insert detection by adding the correct card-detect pin. All IGEP OMAP3 based boards use the same card-detect pin. Signed-off-by: Enric Balletbo i Serra Signed-off-by: Ladislav Michl Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index 41f5d386f21f..f4f2ce46d681 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -188,6 +188,7 @@ vmmc-supply = <&vmmc1>; vmmc_aux-supply = <&vsim>; bus-width = <4>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; }; &mmc3 { From a1f6ad14176d466c00e6a687f9c78ec6c7ad6bf8 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:34 +0200 Subject: [PATCH 0018/1050] ARM: dts: igep0020: Add SD card write-protect pin. A host device that supports write protection should refuse to write to an SD card that is designated read-only when write-protect is set. This is an optional feature of the SD specification. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep0020-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi index d6f839cab649..b6971060648a 100644 --- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi +++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi @@ -194,6 +194,12 @@ OMAP3630_CORE2_IOPAD(0x25f8, PIN_OUTPUT | MUX_MODE4) /* etk_d14.gpio_28 */ >; }; + + mmc1_wp_pins: pinmux_mmc1_cd_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT | MUX_MODE4) /* etk_d15.gpio_29 */ + >; + }; }; &i2c3 { @@ -250,3 +256,8 @@ }; }; }; + +&mmc1 { + pinctrl-0 = <&mmc1_pins &mmc1_wp_pins>; + wp-gpios = <&gpio1 29 GPIO_ACTIVE_LOW>; /* gpio_29 */ +}; From a3e5afe491bf7462013aa9f0bb3b55e3d05b784d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 28 Apr 2016 23:39:53 +0900 Subject: [PATCH 0019/1050] iio: pressure: bmp280: fix error message for wrong chip id The bmp280 driver also supports BMP180 which has a different chip id with BMP280. The probe routine verifies that the device reports the correct chip id but the error message is confusing as if BMP280's chip id is always expected. Reported-by: Matt Ranostay Signed-off-by: Akinobu Mita Cc: Matt Ranostay Cc: Vlad Dogaru Cc: Christoph Mair Cc: Jonathan Cameron Cc: Hartmut Knaack Cc: Lars-Peter Clausen Cc: Peter Meerwald Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/pressure/bmp280.c b/drivers/iio/pressure/bmp280.c index 2f1498e12bb2..724452d61846 100644 --- a/drivers/iio/pressure/bmp280.c +++ b/drivers/iio/pressure/bmp280.c @@ -879,8 +879,8 @@ static int bmp280_probe(struct i2c_client *client, if (ret < 0) return ret; if (chip_id != id->driver_data) { - dev_err(&client->dev, "bad chip id. expected %x got %x\n", - BMP280_CHIP_ID, chip_id); + dev_err(&client->dev, "bad chip id. expected %lx got %x\n", + id->driver_data, chip_id); return -EINVAL; } From 4cb54493ab2003568222e6ad7449747354f22ce3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 May 2016 10:38:31 +0200 Subject: [PATCH 0020/1050] ARM: samsung: improve static dma_mask definition When no DMA master devices are part of the kernel configuration, we get a warning about the unused dma mask definition: arch/arm/plat-samsung/devs.c:71:12: error: 'samsung_device_dma_mask' defined but not used [-Werror=unused-variable] static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); We could simply mark this as __maybe_unused to shut up that warning, but a nicer solution seems to be to have a separate mask for each device. The advantage is that a driver that happens to call dma_set_mask() on one device doesn't implicitly change the mask for the other devices as well. This is more of a theoretical problem, as obviously nothing does it for the devices in this file (or they would have always been broken), but it feels cleaner that way. The definition works by creating an array in place so we can take the address of it and let the compiler generate a hidden symbol for it at compile time. Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Kozlowski --- arch/arm/plat-samsung/devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 84baa16f4c0b..e93aa6734147 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -68,7 +68,7 @@ #include #include -static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); +#define samsung_device_dma_mask (*((u64[]) { DMA_BIT_MASK(32) })) /* AC97 */ #ifdef CONFIG_CPU_S3C2440 From 8a6f71ccb6c39ca5ae442d1478d3a076e2b1361e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 May 2016 16:17:36 +0200 Subject: [PATCH 0021/1050] ARM: exynos: don't select keyboard driver The samsung-keypad driver is implicitly selected by ARCH_EXYNOS4 (why?), but this fails if CONFIG_INPUT is a loadable module: drivers/input/built-in.o: In function `samsung_keypad_remove': drivers/input/keyboard/samsung-keypad.c:461: undefined reference to `input_unregister_device' drivers/input/built-in.o: In function `samsung_keypad_irq': drivers/input/keyboard/samsung-keypad.c:137: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_irq': include/linux/input.h:389: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_probe': drivers/input/keyboard/samsung-keypad.c:358: undefined reference to `devm_input_allocate_device' drivers/input/built-in.o:(.debug_addr+0x34): undefined reference to `input_set_capability' This removes the 'select' as suggested by Krzysztof Kozlowski and instead enables the driver from the defconfig files. The problem does not happen on mainline kernels, as we don't normally build built-in input drivers when CONFIG_INPUT=m, but I am experimenting with a patch to change this, and the samsung keypad driver showed up as one example that was silently broken before. Signed-off-by: Arnd Bergmann Link: https://lkml.org/lkml/2016/2/14/55 Signed-off-by: Krzysztof Kozlowski --- arch/arm/configs/exynos_defconfig | 1 + arch/arm/configs/multi_v7_defconfig | 1 + arch/arm/mach-exynos/Kconfig | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 6ffd7e76f3ce..1a197fc8fdc7 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -77,6 +77,7 @@ CONFIG_TOUCHSCREEN_ATMEL_MXT=y CONFIG_INPUT_MISC=y CONFIG_INPUT_MAX77693_HAPTIC=y CONFIG_INPUT_MAX8997_HAPTIC=y +CONFIG_KEYBOARD_SAMSUNG=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 28234906a064..ad97a41ceb35 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -260,6 +260,7 @@ CONFIG_KEYBOARD_TEGRA=y CONFIG_KEYBOARD_SPEAR=y CONFIG_KEYBOARD_ST_KEYSCAN=y CONFIG_KEYBOARD_CROS_EC=m +CONFIG_KEYBOARD_SAMSUNG=m CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_MOUSE_CYAPA=m CONFIG_MOUSE_ELAN_I2C=y diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 207fa2c737a6..c0d9f334d1d0 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -58,7 +58,6 @@ config ARCH_EXYNOS4 select CLKSRC_SAMSUNG_PWM if CPU_EXYNOS4210 select CPU_EXYNOS4210 select GIC_NON_BANKED - select KEYBOARD_SAMSUNG if INPUT_KEYBOARD select MIGHT_HAVE_CACHE_L2X0 help Samsung EXYNOS4 (Cortex-A9) SoC based systems From 585d70006f6e30f42e96d56c6c0933671c516c7b Mon Sep 17 00:00:00 2001 From: Alden Tondettar Date: Wed, 18 May 2016 14:09:17 -0700 Subject: [PATCH 0022/1050] udf: Don't BUG on missing metadata partition descriptor Currently, if a metadata partition map is missing its partition descriptor, then udf_get_pblock_meta25() will BUG() out the first time it is called. This is rather drastic for a corrupted filesystem, so just treat this case as an invalid mapping instead. Signed-off-by: Alden Tondettar Signed-off-by: Jan Kara --- fs/udf/partition.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 5f861ed287c3..e4e9e70c37f2 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -317,8 +317,9 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, mdata = &map->s_type_specific.s_metadata; inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe; - /* We shouldn't mount such media... */ - BUG_ON(!inode); + if (!inode) + return 0xFFFFFFFF; + retblk = udf_try_read_meta(inode, block, partition, offset); if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) { udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n"); From 3743a03e72b73b6234768bce06d7bf5a57c47285 Mon Sep 17 00:00:00 2001 From: Alden Tondettar Date: Wed, 18 May 2016 14:09:18 -0700 Subject: [PATCH 0023/1050] udf: Use IS_ERR when loading metadata mirror file entry Currently when udf_get_pblock_meta25() fails to map a block using the primary metadata file, it will attempt to load the mirror file entry by calling udf_find_metadata_inode_efe(). That function will return a ERR_PTR if it fails, but the return value is only checked against NULL. Test the return value using IS_ERR() and change it to NULL if needed. Signed-off-by: Alden Tondettar Signed-off-by: Jan Kara --- fs/udf/partition.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/udf/partition.c b/fs/udf/partition.c index e4e9e70c37f2..ca3cde336324 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -326,6 +326,8 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) { mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, map->s_partition_num); + if (IS_ERR(mdata->s_mirror_fe)) + mdata->s_mirror_fe = NULL; mdata->s_flags |= MF_MIRROR_FE_LOADED; } From 7888824b0b1c9c3753d2aedf1d00e7a1c20c18af Mon Sep 17 00:00:00 2001 From: Alden Tondettar Date: Wed, 18 May 2016 14:09:19 -0700 Subject: [PATCH 0024/1050] udf: Use correct partition reference number for metadata UDF/OSTA terminology is confusing. Partition Numbers (PNs) are arbitrary 16-bit values, one for each physical partition in the volume. Partition Reference Numbers (PRNs) are indices into the the Partition Map Table and do not necessarily equal the PN of the mapped partition. The current metadata code mistakenly uses the PN instead of the PRN when mapping metadata blocks to physical/sparable blocks. Windows-created UDF 2.5 discs for some reason use large, arbitrary PNs, resulting in mount failure and KASAN read warnings in udf_read_inode(). For example, a NetBSD UDF 2.5 partition might look like this: PRN PN Type --- -- ---- 0 0 Sparable 1 0 Metadata Since PRN == PN, we are fine. But Windows could gives us: PRN PN Type --- ---- ---- 0 8192 Sparable 1 8192 Metadata So udf_read_inode() will start out by checking the partition length in sbi->s_partmaps[8192], which is obviously out of bounds. Fix this by creating a new field (s_phys_partition_ref) in struct udf_meta_data, referencing whatever physical or sparable map has the same partition number as the metadata partition. [JK: Add comment about s_phys_partition_ref, change its name] Signed-off-by: Alden Tondettar Signed-off-by: Jan Kara --- fs/udf/partition.c | 6 ++++-- fs/udf/super.c | 22 ++++++++++++---------- fs/udf/udf_sb.h | 5 +++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/fs/udf/partition.c b/fs/udf/partition.c index ca3cde336324..888c364b2fe9 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, map = &UDF_SB(sb)->s_partmaps[partition]; /* map to sparable/physical partition desc */ phyblock = udf_get_pblock(sb, eloc.logicalBlockNum, - map->s_partition_num, ext_offset + offset); + map->s_type_specific.s_metadata.s_phys_partition_ref, + ext_offset + offset); } brelse(epos.bh); @@ -325,7 +326,8 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n"); if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) { mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, + mdata->s_phys_partition_ref); if (IS_ERR(mdata->s_mirror_fe)) mdata->s_mirror_fe = NULL; mdata->s_flags |= MF_MIRROR_FE_LOADED; diff --git a/fs/udf/super.c b/fs/udf/super.c index 5e2c8c814e1b..4942549e7dc8 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -951,13 +951,13 @@ out2: } struct inode *udf_find_metadata_inode_efe(struct super_block *sb, - u32 meta_file_loc, u32 partition_num) + u32 meta_file_loc, u32 partition_ref) { struct kernel_lb_addr addr; struct inode *metadata_fe; addr.logicalBlockNum = meta_file_loc; - addr.partitionReferenceNum = partition_num; + addr.partitionReferenceNum = partition_ref; metadata_fe = udf_iget_special(sb, &addr); @@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, return metadata_fe; } -static int udf_load_metadata_files(struct super_block *sb, int partition) +static int udf_load_metadata_files(struct super_block *sb, int partition, + int type1_index) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; @@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; + mdata->s_phys_partition_ref = type1_index; /* metadata address */ udf_debug("Metadata file location: block = %d part = %d\n", - mdata->s_meta_file_loc, map->s_partition_num); + mdata->s_meta_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %d part = %d\n", - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); @@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) */ if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { addr.logicalBlockNum = mdata->s_bitmap_file_loc; - addr.partitionReferenceNum = map->s_partition_num; + addr.partitionReferenceNum = mdata->s_phys_partition_ref; udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); @@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) p = (struct partitionDesc *)bh->b_data; partitionNumber = le16_to_cpu(p->partitionNumber); - /* First scan for TYPE1, SPARABLE and METADATA partitions */ + /* First scan for TYPE1 and SPARABLE partitions */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%d == %d)\n", @@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) goto out_bh; if (map->s_partition_type == UDF_METADATA_MAP25) { - ret = udf_load_metadata_files(sb, i); + ret = udf_load_metadata_files(sb, i, type1_idx); if (ret < 0) { udf_err(sb, "error loading MetaData partition map %d\n", i); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 27b5335730c9..c13875d669c0 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -61,6 +61,11 @@ struct udf_meta_data { __u32 s_bitmap_file_loc; __u32 s_alloc_unit_size; __u16 s_align_unit_size; + /* + * Partition Reference Number of the associated physical / sparable + * partition + */ + __u16 s_phys_partition_ref; int s_flags; struct inode *s_metadata_fe; struct inode *s_mirror_fe; From a118084432d642eeccb961c7c8cc61525a941fcb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 20 May 2016 22:13:45 +0200 Subject: [PATCH 0025/1050] vfs: add d_real_inode() helper Needed by the following fix. Signed-off-by: Miklos Szeredi Cc: --- include/linux/dcache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e9422cb5989..ad5d582f9b14 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -576,5 +576,17 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry, return inode; } +/** + * d_real_inode - Return the real inode + * @dentry: The dentry to query + * + * If dentry is on an union/overlay, then return the underlying, real inode. + * Otherwise return d_inode(). + */ +static inline struct inode *d_real_inode(struct dentry *dentry) +{ + return d_backing_inode(d_real(dentry)); +} + #endif /* __LINUX_DCACHE_H */ From eb0a4a47ae89aaa0674ab3180de6a162f3be2ddf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 20 May 2016 22:13:45 +0200 Subject: [PATCH 0026/1050] af_unix: fix hard linked sockets on overlay Overlayfs uses separate inodes even in the case of hard links on the underlying filesystems. This is a problem for AF_UNIX socket implementation which indexes sockets based on the inode. This resulted in hard linked sockets not working. The fix is to use the real, underlying inode. Test case follows: -- ovl-sock-test.c -- #include #include #include #include #define SOCK "test-sock" #define SOCK2 "test-sock2" int main(void) { int fd, fd2; struct sockaddr_un addr = { .sun_family = AF_UNIX, .sun_path = SOCK, }; struct sockaddr_un addr2 = { .sun_family = AF_UNIX, .sun_path = SOCK2, }; unlink(SOCK); unlink(SOCK2); if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) err(1, "socket"); if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) err(1, "bind"); if (listen(fd, 0) == -1) err(1, "listen"); if (link(SOCK, SOCK2) == -1) err(1, "link"); if ((fd2 = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) err(1, "socket"); if (connect(fd2, (struct sockaddr *) &addr2, sizeof(addr2)) == -1) err (1, "connect"); return 0; } ---- Reported-by: Alexander Morozov Signed-off-by: Miklos Szeredi Cc: --- net/unix/af_unix.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8269da73e9e5..7748199b3568 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; - if (dentry && d_backing_inode(dentry) == i) { + if (dentry && d_real_inode(dentry) == i) { sock_hold(s); goto found; } @@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net, err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - inode = d_backing_inode(path.dentry); + inode = d_real_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; @@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = u_path; list = &unix_socket_table[hash]; From 0e35cf5ce00d873d6e529d2b2cd7598d52438051 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 20 May 2016 10:06:41 -0700 Subject: [PATCH 0027/1050] iio: humidity: hdc100x: correct humidity integration time mask Apply the correct mask to enable all available humidity integration times. Currently, the driver defaults to 6500 and all is okay with that. However, if 3850 is selected we get a stuck bit and can't change back to 6500 or select 2500. (Verified with HDC1008) Signed-off-by: Alison Schofield Cc: Daniel Baluta Reviewed-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index fa4767613173..59aa1cbdc2fd 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -55,7 +55,7 @@ static const struct { }, { /* IIO_HUMIDITYRELATIVE channel */ .shift = 8, - .mask = 2, + .mask = 3, }, }; From 1cb0d06a00f2ec0ed8f926ec62f53bc9e12ea55e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 May 2016 11:02:56 +0300 Subject: [PATCH 0028/1050] iio: humidity: am2315: Remove a stray unlock We haven't taken the lock yet so we don't need to unlock here. Fixes: 0d96d5ead3f7 ('iio: humidity: Add triggered buffer support for AM2315') Signed-off-by: Dan Carpenter Acked-by: Tiberiu Breana Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/am2315.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c index 3be6d209a159..11535911a5c6 100644 --- a/drivers/iio/humidity/am2315.c +++ b/drivers/iio/humidity/am2315.c @@ -165,10 +165,8 @@ static irqreturn_t am2315_trigger_handler(int irq, void *p) struct am2315_sensor_data sensor_data; ret = am2315_read_data(data, &sensor_data); - if (ret < 0) { - mutex_unlock(&data->lock); + if (ret < 0) goto err; - } mutex_lock(&data->lock); if (*(indio_dev->active_scan_mask) == AM2315_ALL_CHANNEL_MASK) { From 14f2461b822dffb116256ee9155f7eca96064f7a Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Fri, 20 May 2016 17:44:36 +0300 Subject: [PATCH 0029/1050] max44000: Remove scale from proximity This is not implemented and doesn't really make sense because IIO proximity is unit-less. Remove IIO_CHAN_INFO_SCALE from info_mask because so that the _scale sysfs entry won't appear. This fixes userspace tools like generic_buffer which abort when reads returns an error. Signed-off-by: Crestez Dan Leonard Signed-off-by: Jonathan Cameron --- drivers/iio/light/max44000.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index e01e58a9bd14..f17cb2ea18f5 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -147,7 +147,6 @@ static const struct iio_chan_spec max44000_channels[] = { { .type = IIO_PROXIMITY, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .scan_index = MAX44000_SCAN_INDEX_PRX, .scan_type = { .sign = 'u', From 5138806f16c74c7cb8ac3e408a859c79eb7c9567 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 21 May 2016 20:01:01 -0700 Subject: [PATCH 0030/1050] iio: proximity: as3935: correct IIO_CHAN_INFO_RAW output IIO_CHAN_INFO_RAW was returning processed data which was incorrect. This also adds the IIO_CHAN_INFO_SCALE value to convert to a processed value. Signed-off-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-proximity-as3935 | 2 +- drivers/iio/proximity/as3935.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 index 6708c5e264aa..33e96f740639 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 +++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 @@ -1,4 +1,4 @@ -What /sys/bus/iio/devices/iio:deviceX/in_proximity_raw +What /sys/bus/iio/devices/iio:deviceX/in_proximity_input Date: March 2014 KernelVersion: 3.15 Contact: Matt Ranostay diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index f4d29d5dbd5f..f0a0defb68a4 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -72,7 +72,8 @@ static const struct iio_chan_spec as3935_channels[] = { .type = IIO_PROXIMITY, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_PROCESSED), + BIT(IIO_CHAN_INFO_PROCESSED) | + BIT(IIO_CHAN_INFO_SCALE), .scan_index = 0, .scan_type = { .sign = 'u', @@ -181,7 +182,12 @@ static int as3935_read_raw(struct iio_dev *indio_dev, /* storm out of range */ if (*val == AS3935_DATA_MASK) return -EINVAL; - *val *= 1000; + + if (m == IIO_CHAN_INFO_PROCESSED) + *val *= 1000; + break; + case IIO_CHAN_INFO_SCALE: + *val = 1000; break; default: return -EINVAL; From 7d0643634ea567969bf3f3ed6193a9d6fc75653b Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 21 May 2016 20:01:02 -0700 Subject: [PATCH 0031/1050] iio: proximity: as3935: remove triggered buffer processing Triggered buffers shouldn't return processed data, and the respective conversion was overflowing the defined .realbits for the channel. Cc: george.mccollister@gmail.com Signed-off-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/as3935.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index f0a0defb68a4..6aed02437efc 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -213,7 +213,6 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private) if (ret) goto err_read; val &= AS3935_DATA_MASK; - val *= 1000; iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp); err_read: From 37b1ba2c68cfbe37f5f45bb91bcfaf2b016ae6a1 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 21 May 2016 20:01:03 -0700 Subject: [PATCH 0032/1050] iio: proximity: as3935: fix buffer stack trashing Buffer wasn't of a valid size to allow the timestamp, and correct padding. This patchset also moves the buffer off the stack, and onto the heap. Cc: george.mccollister@gmail.com Signed-off-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/as3935.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 6aed02437efc..e2f926cdcad2 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -64,6 +64,7 @@ struct as3935_state { struct delayed_work work; u32 tune_cap; + u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */ u8 buf[2] ____cacheline_aligned; }; @@ -212,9 +213,10 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private) ret = as3935_read(st, AS3935_DATA, &val); if (ret) goto err_read; - val &= AS3935_DATA_MASK; - iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp); + st->buffer[0] = val & AS3935_DATA_MASK; + iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer, + pf->timestamp); err_read: iio_trigger_notify_done(indio_dev->trig); From 275ae411e56f8f900fa364da29c4706f9af4e1f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Tue, 24 May 2016 16:53:49 +0200 Subject: [PATCH 0033/1050] perf/x86/intel/rapl: Fix pmus free during cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On rapl cleanup path, kfree() is given by mistake the address of the pointer of the structure to free (rapl_pmus->pmus + i). Pass the pointer instead (rapl_pmus->pmus[i]). Fixes: 9de8d686955b "perf/x86/intel/rapl: Convert it to a per package facility" Signed-off-by: Vincent Stehlé Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464101629-14905-1-git-send-email-vincent.stehle@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 99c4bab123cd..e30eef4f29a6 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void) int i; for (i = 0; i < rapl_pmus->maxpkg; i++) - kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } From 720b287d8382616a29524dd98264ac1e3a069e9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 May 2016 22:48:51 +0200 Subject: [PATCH 0034/1050] netfilter: conntrack: remove leftover binary sysctl define Users got removed in f8572d8f2a2ba ("sysctl net: Remove unused binary sysctl code"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45bcacb2..2933db30098a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -489,8 +489,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { { } }; -#define NET_NF_CONNTRACK_MAX 2089 - static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", From dc3ee32e96d74dd6c80eed63af5065cb75899299 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 May 2016 21:18:52 -0500 Subject: [PATCH 0035/1050] netfilter: nf_queue: Make the queue_handler pernet Florian Weber reported: > Under full load (unshare() in loop -> OOM conditions) we can > get kernel panic: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [] nfqnl_nf_hook_drop+0x35/0x70 > [..] > task: ffff88012dfa3840 ti: ffff88012dffc000 task.ti: ffff88012dffc000 > RIP: 0010:[] [] nfqnl_nf_hook_drop+0x35/0x70 > RSP: 0000:ffff88012dfffd80 EFLAGS: 00010206 > RAX: 0000000000000008 RBX: ffffffff81add0c0 RCX: ffff88013fd80000 > [..] > Call Trace: > [] nf_queue_nf_hook_drop+0x18/0x20 > [] nf_unregister_net_hook+0xdb/0x150 > [] netfilter_net_exit+0x2f/0x60 > [] ops_exit_list.isra.4+0x38/0x60 > [] setup_net+0xc2/0x120 > [] copy_net_ns+0x79/0x120 > [] create_new_namespaces+0x11b/0x1e0 > [] unshare_nsproxy_namespaces+0x57/0xa0 > [] SyS_unshare+0x1b2/0x340 > [] entry_SYSCALL_64_fastpath+0x1e/0xa8 > Code: 65 00 48 89 e5 41 56 41 55 41 54 53 83 e8 01 48 8b 97 70 12 00 00 48 98 49 89 f4 4c 8b 74 c2 18 4d 8d 6e 08 49 81 c6 88 00 00 00 <49> 8b 5d 00 48 85 db 74 1a 48 89 df 4c 89 e2 48 c7 c6 90 68 47 > The simple fix for this requires a new pernet variable for struct nf_queue that indicates when it is safe to use the dynamically allocated nf_queue state. As we need a variable anyway make nf_register_queue_handler and nf_unregister_queue_handler pernet. This allows the existing logic of when it is safe to use the state from the nfnetlink_queue module to be reused with no changes except for making it per net. The syncrhonize_rcu from nf_unregister_queue_handler is moved to a new function nfnl_queue_net_exit_batch so that the worst case of having a syncrhonize_rcu in the pernet exit path is not experienced in batch mode. Reported-by: Florian Westphal Signed-off-by: "Eric W. Biederman" Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ++-- include/net/netns/netfilter.h | 2 ++ net/netfilter/nf_queue.c | 17 ++++++++--------- net/netfilter/nfnetlink_queue.c | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9c5638ad872e..0dbce55437f2 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -28,8 +28,8 @@ struct nf_queue_handler { struct nf_hook_ops *ops); }; -void nf_register_queue_handler(const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(void); +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 38aa4983e2a9..36d723579af2 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -5,11 +5,13 @@ struct proc_dir_entry; struct nf_logger; +struct nf_queue_handler; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif + const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5baa8e24e6ac..b19ad20a705c 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -26,23 +26,21 @@ * Once the queue is registered it must reinject all packets it * receives, no matter what. */ -static const struct nf_queue_handler __rcu *queue_handler __read_mostly; /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(const struct nf_queue_handler *qh) +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(queue_handler)); - rcu_assign_pointer(queue_handler, qh); + WARN_ON(rcu_access_pointer(net->nf.queue_handler)); + rcu_assign_pointer(net->nf.queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(void) +void nf_unregister_queue_handler(struct net *net) { - RCU_INIT_POINTER(queue_handler, NULL); - synchronize_rcu(); + RCU_INIT_POINTER(net->nf.queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -103,7 +101,7 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (qh) qh->nf_hook_drop(net, ops); rcu_read_unlock(); @@ -122,9 +120,10 @@ int nf_queue(struct sk_buff *skb, struct nf_queue_entry *entry = NULL; const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; + struct net *net = state->net; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (!qh) { status = -ESRCH; goto err; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e34256a0e7e9..309ac026aac2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1377,21 +1377,29 @@ static int __net_init nfnl_queue_net_init(struct net *net) net->nf.proc_netfilter, &nfqnl_file_ops)) return -ENOMEM; #endif + nf_register_queue_handler(net, &nfqh); return 0; } static void __net_exit nfnl_queue_net_exit(struct net *net) { + nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif } +static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +{ + synchronize_rcu(); +} + static struct pernet_operations nfnl_queue_net_ops = { - .init = nfnl_queue_net_init, - .exit = nfnl_queue_net_exit, - .id = &nfnl_queue_net_id, - .size = sizeof(struct nfnl_queue_net), + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .exit_batch = nfnl_queue_net_exit_batch, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), }; static int __init nfnetlink_queue_init(void) @@ -1412,7 +1420,6 @@ static int __init nfnetlink_queue_init(void) } register_netdevice_notifier(&nfqnl_dev_notifier); - nf_register_queue_handler(&nfqh); return status; cleanup_netlink_notifier: @@ -1424,7 +1431,6 @@ out: static void __exit nfnetlink_queue_fini(void) { - nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); From b9d8905e4a751e2cdc0fb474856b7183c594dcc6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 24 May 2016 22:51:27 +0200 Subject: [PATCH 0036/1050] reiserfs: check kstrdup failure Check out-of-memory failure of the kstrdup option. Note that the argument "arg" may be NULL (in that case kstrup returns NULL), so out of memory condition happened if arg was non-NULL and kstrdup returned NULL. The patch also changes the call to replace_mount_options - if we didn't pass any filesystem-specific options, we don't call replace_mount_options (thus we don't erase existing reported options). Note that to properly report options after remount, the reiserfs filesystem should implement the show_options method. Without the show_options method, options changed with remount replace existing options. Signed-off-by: Mikulas Patocka Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b8f2d1e8c645..c72c16c5a60f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts = kstrdup(arg, GFP_KERNEL); + char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif + new_opts = kstrdup(arg, GFP_KERNEL); + if (arg && !new_opts) + return -ENOMEM; + sync_filesystem(s); reiserfs_write_lock(s); @@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok_unlocked: - replace_mount_options(s, new_opts); + if (new_opts) + replace_mount_options(s, new_opts); return 0; out_err_unlock: From 4f996e234dad488e5d9ba0858bc1bae12eff82c3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 May 2016 11:48:25 -0400 Subject: [PATCH 0037/1050] percpu: fix synchronization between chunk->map_extend_work and chunk destruction Atomic allocations can trigger async map extensions which is serviced by chunk->map_extend_work. pcpu_balance_work which is responsible for destroying idle chunks wasn't synchronizing properly against chunk->map_extend_work and may end up freeing the chunk while the work item is still in flight. This patch fixes the bug by rolling async map extension operations into pcpu_balance_work. Signed-off-by: Tejun Heo Reported-and-tested-by: Alexei Starovoitov Reported-by: Vlastimil Babka Reported-by: Sasha Levin Cc: stable@vger.kernel.org # v3.18+ Fixes: 9c824b6a172c ("percpu: make sure chunk->map array has available space") --- mm/percpu.c | 57 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 0c59684f1ff2..b1d2a3844792 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -112,7 +112,7 @@ struct pcpu_chunk { int map_used; /* # of map entries used before the sentry */ int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ - struct work_struct map_extend_work;/* async ->map[] extension */ + struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ void *data; /* chunk data */ int first_free; /* no free below this */ @@ -166,6 +166,9 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ +/* chunks which need their map areas extended, protected by pcpu_lock */ +static LIST_HEAD(pcpu_map_extend_chunks); + /* * The number of empty populated pages, protected by pcpu_lock. The * reserved chunk doesn't contribute to the count. @@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic) { int margin, new_alloc; + lockdep_assert_held(&pcpu_lock); + if (is_atomic) { margin = 3; if (chunk->map_alloc < - chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && - pcpu_async_enabled) - schedule_work(&chunk->map_extend_work); + chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) { + if (list_empty(&chunk->map_extend_list)) { + list_add_tail(&chunk->map_extend_list, + &pcpu_map_extend_chunks); + pcpu_schedule_balance_work(); + } + } } else { margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; } @@ -467,20 +476,6 @@ out_unlock: return 0; } -static void pcpu_map_extend_workfn(struct work_struct *work) -{ - struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk, - map_extend_work); - int new_alloc; - - spin_lock_irq(&pcpu_lock); - new_alloc = pcpu_need_to_extend(chunk, false); - spin_unlock_irq(&pcpu_lock); - - if (new_alloc) - pcpu_extend_area_map(chunk, new_alloc); -} - /** * pcpu_fit_in_area - try to fit the requested allocation in a candidate area * @chunk: chunk the candidate area belongs to @@ -740,7 +735,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->map_used = 1; INIT_LIST_HEAD(&chunk->list); - INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn); + INIT_LIST_HEAD(&chunk->map_extend_list); chunk->free_size = pcpu_unit_size; chunk->contig_hint = pcpu_unit_size; @@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work) if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) continue; + list_del_init(&chunk->map_extend_list); list_move(&chunk->list, &to_free); } @@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work) pcpu_destroy_chunk(chunk); } + /* service chunks which requested async area map extension */ + do { + int new_alloc = 0; + + spin_lock_irq(&pcpu_lock); + + chunk = list_first_entry_or_null(&pcpu_map_extend_chunks, + struct pcpu_chunk, map_extend_list); + if (chunk) { + list_del_init(&chunk->map_extend_list); + new_alloc = pcpu_need_to_extend(chunk, false); + } + + spin_unlock_irq(&pcpu_lock); + + if (new_alloc) + pcpu_extend_area_map(chunk, new_alloc); + } while (chunk); + /* * Ensure there are certain number of free populated pages for * atomic allocs. Fill up from the most packed so that atomic @@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, */ schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); INIT_LIST_HEAD(&schunk->list); - INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn); + INIT_LIST_HEAD(&schunk->map_extend_list); schunk->base_addr = base_addr; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); @@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, if (dyn_size) { dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); INIT_LIST_HEAD(&dchunk->list); - INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn); + INIT_LIST_HEAD(&dchunk->map_extend_list); dchunk->base_addr = base_addr; dchunk->map = dmap; dchunk->map_alloc = ARRAY_SIZE(dmap); From 6710e594f71ccaad8101bc64321152af7cd9ea28 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 May 2016 11:48:25 -0400 Subject: [PATCH 0038/1050] percpu: fix synchronization between synchronous map extension and chunk destruction For non-atomic allocations, pcpu_alloc() can try to extend the area map synchronously after dropping pcpu_lock; however, the extension wasn't synchronized against chunk destruction and the chunk might get freed while extension is in progress. This patch fixes the bug by putting most of non-atomic allocations under pcpu_alloc_mutex to synchronize against pcpu_balance_work which is responsible for async chunk management including destruction. Signed-off-by: Tejun Heo Reported-and-tested-by: Alexei Starovoitov Reported-by: Vlastimil Babka Reported-by: Sasha Levin Cc: stable@vger.kernel.org # v3.18+ Fixes: 1a4d76076cda ("percpu: implement asynchronous chunk population") --- mm/percpu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index b1d2a3844792..9903830aaebb 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -162,7 +162,7 @@ static struct pcpu_chunk *pcpu_reserved_chunk; static int pcpu_reserved_chunk_limit; static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ -static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ +static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ @@ -444,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); unsigned long flags; + lockdep_assert_held(&pcpu_alloc_mutex); + new = pcpu_mem_zalloc(new_size); if (!new) return -ENOMEM; @@ -890,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, return NULL; } + if (!is_atomic) + mutex_lock(&pcpu_alloc_mutex); + spin_lock_irqsave(&pcpu_lock, flags); /* serve reserved allocations from the reserved chunk if available */ @@ -962,12 +967,9 @@ restart: if (is_atomic) goto fail; - mutex_lock(&pcpu_alloc_mutex); - if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { chunk = pcpu_create_chunk(); if (!chunk) { - mutex_unlock(&pcpu_alloc_mutex); err = "failed to allocate new chunk"; goto fail; } @@ -978,7 +980,6 @@ restart: spin_lock_irqsave(&pcpu_lock, flags); } - mutex_unlock(&pcpu_alloc_mutex); goto restart; area_found: @@ -988,8 +989,6 @@ area_found: if (!is_atomic) { int page_start, page_end, rs, re; - mutex_lock(&pcpu_alloc_mutex); - page_start = PFN_DOWN(off); page_end = PFN_UP(off + size); @@ -1000,7 +999,6 @@ area_found: spin_lock_irqsave(&pcpu_lock, flags); if (ret) { - mutex_unlock(&pcpu_alloc_mutex); pcpu_free_area(chunk, off, &occ_pages); err = "failed to populate"; goto fail_unlock; @@ -1040,6 +1038,8 @@ fail: /* see the flag handling in pcpu_blance_workfn() */ pcpu_atomic_alloc_failed = true; pcpu_schedule_balance_work(); + } else { + mutex_unlock(&pcpu_alloc_mutex); } return NULL; } From b00c52dae6d9ee8d0f2407118ef6544ae5524781 Mon Sep 17 00:00:00 2001 From: Wenwei Tao Date: Fri, 13 May 2016 22:59:20 +0800 Subject: [PATCH 0039/1050] cgroup: remove redundant cleanup in css_create When create css failed, before call css_free_rcu_fn, we remove the css id and exit the percpu_ref, but we will do these again in css_free_work_fn, so they are redundant. Especially the css id, that would cause problem if we remove it twice, since it may be assigned to another css after the first remove. tj: This was broken by two commits updating the free path without synchronizing the creation failure path. This can be easily triggered by trying to create more than 64k memory cgroups. Signed-off-by: Wenwei Tao Signed-off-by: Tejun Heo Cc: Vladimir Davydov Fixes: 9a1049da9bd2 ("percpu-refcount: require percpu_ref to be exited explicitly") Fixes: 01e586598b22 ("cgroup: release css->id after css_free") Cc: stable@vger.kernel.org # v3.17+ --- kernel/cgroup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 86cb5c6e8932..789b84f973c9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5150,7 +5150,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL); if (err < 0) - goto err_free_percpu_ref; + goto err_free_css; css->id = err; /* @css is ready to be brought online now, make it visible */ @@ -5174,9 +5174,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, err_list_del: list_del_rcu(&css->sibling); - cgroup_idr_remove(&ss->css_idr, css->id); -err_free_percpu_ref: - percpu_ref_exit(&css->refcnt); err_free_css: call_rcu(&css->rcu_head, css_free_rcu_fn); return ERR_PTR(err); From 38272dc4f1b17437871b786d567e1242d0904f5a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:27 +0000 Subject: [PATCH 0040/1050] perf symbols: Check kptr_restrict for root If kptr_restrict is set to 2, even root is not allowed to see pointers. This patch checks kptr_restrict even if euid == 0. For root, report error if kptr_restrict is 2. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 20f9cb32b703..54c4ff2b1cee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1933,17 +1933,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; From 3dc6c1d54ff4cc9ce7e8513c286c970304cde20b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:28 +0000 Subject: [PATCH 0041/1050] perf record: Fix crash when kptr is restricted Before this patch, a simple 'perf record' could fail if kptr_restrict is set to 1 (for normal user) or 2 (for root): # perf record ls WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Segmentation fault (core dumped) This patch skips perf_event__synthesize_kernel_mmap() when kptr is not available. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Fixes: 45e90056904b ("perf machine: Do not bail out if not managing to read ref reloc symbol") Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f6fcc6832949..9b141f12329e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; From 5ea5888b2fbf5b230da62b2a21c8247bebb6c9cf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 27 May 2016 11:35:51 +0000 Subject: [PATCH 0042/1050] perf ctf: Convert invalid chars in a string before set value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We observed some crazy apps on Android set their comm to unprintable string. For example: # cat /proc/10607/task/*/comm tencent.qqmusic ... Binder_2 日志输出线 <-- Chinese word 'log output thread' WifiManager ... 'perf data convert' fails to convert perf.data with such string to CTF format. For example: # cat << EOF > ./badguy.c #include int main(int argc, char *argv[]) { prctl(PR_SET_NAME, "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf"); while(1) sleep(1); return 0; } EOF # gcc ./badguy.c # perf record -e sched:* ./a.out # perf data convert --to-ctf ./bad.ctf CTF stream 4 flush failed [ perf data convert: Converted 'perf.data' into CTF data './bad.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./bad.ctf/ [error] Packet size (18446744073709551615 bits) is larger than remaining file size (262144 bits). [error] Stream index creation error. [error] Open file stream error. [warning] [Context] Cannot open_trace of format ctf at path ./bad.ctf. [warning] [Context] cannot open trace "./bad.ctf" from ./bad.ctf/ for reading. [error] Cannot open any trace for reading. [error] opening trace "./bad.ctf/" for reading. [error] none of the specified trace paths could be opened. This patch converts unprintable characters to hexadecimal word. After applying this patch the above test works correctly: # ~/perf data convert --to-ctf ./good.ctf [ perf data convert: Converted 'perf.data' into CTF data './good.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./good.ctf .. [23:14:35.491665268] (+0.000001100) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5123, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } [23:14:35.491666230] (+0.000000962) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5122, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } .. Committer note: To build perf with libabeltrace, use: $ mkdir -p /tmp/build/perf $ make LIBBABELTRACE=1 LIBBABELTRACE_DIR=/usr/local O=/tmp/build/perf -C tools/perf install-bin Or equivalent (no O=, fixup LIBBABELTRACE_DIR, etc). Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464348951-179595-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 41 +++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d248ec5..9f53020c3269 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; From 94abd778a7bb00ed5d00f56d9fbfcbf5b7c02a5c Mon Sep 17 00:00:00 2001 From: Jaap Jan Meijer Date: Thu, 12 May 2016 18:25:08 +0200 Subject: [PATCH 0043/1050] brcmfmac: add fallback for devices that do not report per-chain values If brcmf_cfg80211_get_station fails to determine the RSSI from the per-chain values get the value individually as a fallback. Fixes: 1f0dc59a6de9 ("brcmfmac: rework .get_station() callback") Signed-off-by: Jaap Jan Meijer Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d0631b6cfd53..62f475e31077 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2540,12 +2540,14 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, const u8 *mac, struct station_info *sinfo) { struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_scb_val_le scb_val; s32 err = 0; struct brcmf_sta_info_le sta_info_le; u32 sta_flags; u32 is_tdls_peer; s32 total_rssi; s32 count_rssi; + int rssi; u32 i; brcmf_dbg(TRACE, "Enter, MAC %pM\n", mac); @@ -2629,6 +2631,20 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); total_rssi /= count_rssi; sinfo->signal = total_rssi; + } else if (test_bit(BRCMF_VIF_STATUS_CONNECTED, + &ifp->vif->sme_state)) { + memset(&scb_val, 0, sizeof(scb_val)); + err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_RSSI, + &scb_val, sizeof(scb_val)); + if (err) { + brcmf_err("Could not get rssi (%d)\n", err); + goto done; + } else { + rssi = le32_to_cpu(scb_val.val); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + sinfo->signal = rssi; + brcmf_dbg(CONN, "RSSI %d dBm\n", rssi); + } } } done: From de26859dcf363d520cc44e59f6dcaf20ebe0aadf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 21 May 2016 11:50:35 -0500 Subject: [PATCH 0044/1050] rtlwifi: Fix scheduling while atomic error from commit 49f86ec21c01 Commit 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") was correct for most cases; however, driver rtl8192ce calls the affected routines while in atomic context. The kernel bug output is as follows: BUG: scheduling while atomic: wpa_supplicant/627/0x00000002 [...] [] __schedule+0x899/0xad0 [] schedule+0x3c/0x90 [] schedule_hrtimeout_range_clock+0xa2/0x120 [] ? hrtimer_init+0x120/0x120 [] ? schedule_hrtimeout_range_clock+0x96/0x120 [] schedule_hrtimeout_range+0x13/0x20 [] usleep_range+0x4f/0x70 [] rtl_rfreg_delay+0x38/0x50 [rtlwifi] [] rtl92c_phy_config_rf_with_headerfile+0xc7/0xe0 [rtl8192ce] To fix this bug, three of the changes from delay to sleep are reverted. Unfortunately, one of the changes involves a delay of 50 msec. The calling code will be modified so that this long delay can be avoided; however, this change is being pushed now to fix the problem in kernel 4.6.0. Fixes: 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") Reported-by: James Feeney Signed-off-by: Larry Finger Cc: James Feeney Cc: Stable [4.6+] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 0f48048b8654..3a0faa8fe9d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m); void rtl_addr_delay(u32 addr) { if (addr == 0xfe) - msleep(50); + mdelay(50); else if (addr == 0xfd) msleep(5); else if (addr == 0xfc) @@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr, rtl_addr_delay(addr); } else { rtl_set_rfreg(hw, rfpath, addr, mask, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_rfreg_delay); @@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data) rtl_addr_delay(addr); } else { rtl_set_bbreg(hw, addr, MASKDWORD, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_bb_delay); From d43a41152f8e9e4c0d19850884d1fada076dee10 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Tue, 19 Apr 2016 11:18:33 +0200 Subject: [PATCH 0045/1050] iio:st_pressure: fix sampling gains (bring inline with ABI) Temperature channels report scaled samples in Celsius although expected as milli degree Celsius in Documentation/ABI/testing/sysfs-bus-iio. Gains are not implemented at all for LPS001WP pressure and temperature channels. This patch ensures that proper offsets and scales are exposed to userpace for both pressure and temperature channels. Also fix a NULL pointer exception when userspace reads content of sysfs scale attribute when gains are not defined. Signed-off-by: Gregor Boirie Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/st_pressure_core.c | 80 +++++++++++++++---------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 9e9b72a8f18f..257b58ac6779 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -28,15 +28,21 @@ #include #include "st_pressure.h" +#define MCELSIUS_PER_CELSIUS 1000 + +/* Default pressure sensitivity */ #define ST_PRESS_LSB_PER_MBAR 4096UL #define ST_PRESS_KPASCAL_NANO_SCALE (100000000UL / \ ST_PRESS_LSB_PER_MBAR) + +/* Default temperature sensitivity */ #define ST_PRESS_LSB_PER_CELSIUS 480UL -#define ST_PRESS_CELSIUS_NANO_SCALE (1000000000UL / \ - ST_PRESS_LSB_PER_CELSIUS) +#define ST_PRESS_MILLI_CELSIUS_OFFSET 42500UL + #define ST_PRESS_NUMBER_DATA_CHANNELS 1 /* FULLSCALE */ +#define ST_PRESS_FS_AVL_1100MB 1100 #define ST_PRESS_FS_AVL_1260MB 1260 #define ST_PRESS_1_OUT_XL_ADDR 0x28 @@ -54,9 +60,6 @@ #define ST_PRESS_LPS331AP_PW_MASK 0x80 #define ST_PRESS_LPS331AP_FS_ADDR 0x23 #define ST_PRESS_LPS331AP_FS_MASK 0x30 -#define ST_PRESS_LPS331AP_FS_AVL_1260_VAL 0x00 -#define ST_PRESS_LPS331AP_FS_AVL_1260_GAIN ST_PRESS_KPASCAL_NANO_SCALE -#define ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN ST_PRESS_CELSIUS_NANO_SCALE #define ST_PRESS_LPS331AP_BDU_ADDR 0x20 #define ST_PRESS_LPS331AP_BDU_MASK 0x04 #define ST_PRESS_LPS331AP_DRDY_IRQ_ADDR 0x22 @@ -67,9 +70,14 @@ #define ST_PRESS_LPS331AP_OD_IRQ_ADDR 0x22 #define ST_PRESS_LPS331AP_OD_IRQ_MASK 0x40 #define ST_PRESS_LPS331AP_MULTIREAD_BIT true -#define ST_PRESS_LPS331AP_TEMP_OFFSET 42500 /* CUSTOM VALUES FOR LPS001WP SENSOR */ + +/* LPS001WP pressure resolution */ +#define ST_PRESS_LPS001WP_LSB_PER_MBAR 16UL +/* LPS001WP temperature resolution */ +#define ST_PRESS_LPS001WP_LSB_PER_CELSIUS 64UL + #define ST_PRESS_LPS001WP_WAI_EXP 0xba #define ST_PRESS_LPS001WP_ODR_ADDR 0x20 #define ST_PRESS_LPS001WP_ODR_MASK 0x30 @@ -78,6 +86,8 @@ #define ST_PRESS_LPS001WP_ODR_AVL_13HZ_VAL 0x03 #define ST_PRESS_LPS001WP_PW_ADDR 0x20 #define ST_PRESS_LPS001WP_PW_MASK 0x40 +#define ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN \ + (100000000UL / ST_PRESS_LPS001WP_LSB_PER_MBAR) #define ST_PRESS_LPS001WP_BDU_ADDR 0x20 #define ST_PRESS_LPS001WP_BDU_MASK 0x04 #define ST_PRESS_LPS001WP_MULTIREAD_BIT true @@ -94,11 +104,6 @@ #define ST_PRESS_LPS25H_ODR_AVL_25HZ_VAL 0x04 #define ST_PRESS_LPS25H_PW_ADDR 0x20 #define ST_PRESS_LPS25H_PW_MASK 0x80 -#define ST_PRESS_LPS25H_FS_ADDR 0x00 -#define ST_PRESS_LPS25H_FS_MASK 0x00 -#define ST_PRESS_LPS25H_FS_AVL_1260_VAL 0x00 -#define ST_PRESS_LPS25H_FS_AVL_1260_GAIN ST_PRESS_KPASCAL_NANO_SCALE -#define ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN ST_PRESS_CELSIUS_NANO_SCALE #define ST_PRESS_LPS25H_BDU_ADDR 0x20 #define ST_PRESS_LPS25H_BDU_MASK 0x04 #define ST_PRESS_LPS25H_DRDY_IRQ_ADDR 0x23 @@ -109,7 +114,6 @@ #define ST_PRESS_LPS25H_OD_IRQ_ADDR 0x22 #define ST_PRESS_LPS25H_OD_IRQ_MASK 0x40 #define ST_PRESS_LPS25H_MULTIREAD_BIT true -#define ST_PRESS_LPS25H_TEMP_OFFSET 42500 #define ST_PRESS_LPS25H_OUT_XL_ADDR 0x28 #define ST_TEMP_LPS25H_OUT_L_ADDR 0x2b @@ -161,7 +165,9 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = { .storagebits = 16, .endianness = IIO_LE, }, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_separate = + BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE), .modified = 0, }, { @@ -177,7 +183,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = { }, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_OFFSET), + BIT(IIO_CHAN_INFO_SCALE), .modified = 0, }, IIO_CHAN_SOFT_TIMESTAMP(1) @@ -212,11 +218,14 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .addr = ST_PRESS_LPS331AP_FS_ADDR, .mask = ST_PRESS_LPS331AP_FS_MASK, .fs_avl = { + /* + * Pressure and temperature sensitivity values + * as defined in table 3 of LPS331AP datasheet. + */ [0] = { .num = ST_PRESS_FS_AVL_1260MB, - .value = ST_PRESS_LPS331AP_FS_AVL_1260_VAL, - .gain = ST_PRESS_LPS331AP_FS_AVL_1260_GAIN, - .gain2 = ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN, + .gain = ST_PRESS_KPASCAL_NANO_SCALE, + .gain2 = ST_PRESS_LSB_PER_CELSIUS, }, }, }, @@ -261,7 +270,17 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE, }, .fs = { - .addr = 0, + .fs_avl = { + /* + * Pressure and temperature resolution values + * as defined in table 3 of LPS001WP datasheet. + */ + [0] = { + .num = ST_PRESS_FS_AVL_1100MB, + .gain = ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN, + .gain2 = ST_PRESS_LPS001WP_LSB_PER_CELSIUS, + }, + }, }, .bdu = { .addr = ST_PRESS_LPS001WP_BDU_ADDR, @@ -298,14 +317,15 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE, }, .fs = { - .addr = ST_PRESS_LPS25H_FS_ADDR, - .mask = ST_PRESS_LPS25H_FS_MASK, .fs_avl = { + /* + * Pressure and temperature sensitivity values + * as defined in table 3 of LPS25H datasheet. + */ [0] = { .num = ST_PRESS_FS_AVL_1260MB, - .value = ST_PRESS_LPS25H_FS_AVL_1260_VAL, - .gain = ST_PRESS_LPS25H_FS_AVL_1260_GAIN, - .gain2 = ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN, + .gain = ST_PRESS_KPASCAL_NANO_SCALE, + .gain2 = ST_PRESS_LSB_PER_CELSIUS, }, }, }, @@ -364,26 +384,26 @@ static int st_press_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - *val = 0; - switch (ch->type) { case IIO_PRESSURE: + *val = 0; *val2 = press_data->current_fullscale->gain; - break; + return IIO_VAL_INT_PLUS_NANO; case IIO_TEMP: + *val = MCELSIUS_PER_CELSIUS; *val2 = press_data->current_fullscale->gain2; - break; + return IIO_VAL_FRACTIONAL; default: err = -EINVAL; goto read_error; } - return IIO_VAL_INT_PLUS_NANO; case IIO_CHAN_INFO_OFFSET: switch (ch->type) { case IIO_TEMP: - *val = 425; - *val2 = 10; + *val = ST_PRESS_MILLI_CELSIUS_OFFSET * + press_data->current_fullscale->gain2; + *val2 = MCELSIUS_PER_CELSIUS; break; default: err = -EINVAL; From 09bc0ddaab6cab0fa95a67d5535ec772e2671193 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Thu, 26 May 2016 19:55:06 -0700 Subject: [PATCH 0046/1050] iio: humidity: hdc100x: fix IIO_TEMP channel reporting IIO_TEMP channel was being incorrectly reported back as Celsius when it should have been milliCelsius. This is via an incorrect scale value being returned to userspace. Signed-off-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 59aa1cbdc2fd..30709838dcdc 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -211,7 +211,7 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_SCALE: if (chan->type == IIO_TEMP) { - *val = 165; + *val = 165000; *val2 = 65536 >> 2; return IIO_VAL_FRACTIONAL; } else { From 13c27e946ddc0e19f3d8b307b14cd8053fcb4844 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 25 May 2016 09:40:26 +0200 Subject: [PATCH 0047/1050] iio: bh1780: dereference the client properly The code in runtime_[suspend|resume] was assuming that the i2c client data was the bh1780 state container, but it contains the IIO device. So first dereference the IIO device from the i2c client, then get the state container using the iio_priv() call. Fixes: 1f0477f18306 ("iio: light: new driver for the ROHM BH1780") Signed-off-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1780.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index f83595334ff1..5fd432df2c8f 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c @@ -226,7 +226,8 @@ static int bh1780_remove(struct i2c_client *client) static int bh1780_runtime_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct bh1780_data *bh1780 = i2c_get_clientdata(client); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct bh1780_data *bh1780 = iio_priv(indio_dev); int ret; ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_POFF); @@ -241,7 +242,8 @@ static int bh1780_runtime_suspend(struct device *dev) static int bh1780_runtime_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct bh1780_data *bh1780 = i2c_get_clientdata(client); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct bh1780_data *bh1780 = iio_priv(indio_dev); int ret; ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_PON); From 0dd09ca419d712d315ffd864158f515e6c64261a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 25 May 2016 09:40:27 +0200 Subject: [PATCH 0048/1050] iio: light: bh1780: assign a static name Using the struct i2c_device->id field for naming the light sensor is a bad idea: when booting from the pure device tree this is NULL and that causes the device not to have the "name" property in sysfs and that in turn confuses the "lsiio" command to stop listing devices. So instead of using the device .id, use the hard string "bh1780", which works just fine. Fixes: 1f0477f18306 ("iio: light: new driver for the ROHM BH1780") Signed-off-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index 5fd432df2c8f..b54dcba05a82 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c @@ -187,7 +187,7 @@ static int bh1780_probe(struct i2c_client *client, indio_dev->dev.parent = &client->dev; indio_dev->info = &bh1780_info; - indio_dev->name = id->name; + indio_dev->name = "bh1780"; indio_dev->channels = bh1780_channels; indio_dev->num_channels = ARRAY_SIZE(bh1780_channels); indio_dev->modes = INDIO_DIRECT_MODE; From 65925b65ed98ffdb277cf5ea1af45731dac0b30b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 21 May 2016 20:43:16 +0200 Subject: [PATCH 0049/1050] iio: st_sensors: switch to a threaded interrupt commit 98ad8b41f58dff6b30713d7f09ae3834b8df7ded ("iio: st_sensors: verify interrupt event to status") caused a regression when reading ST sensors from a HRTimer trigger rather than the intrinsic interrupts: the HRTimer may trigger faster than the sensor provides new values, and as the check against new values available as a cause of the interrupt trigger was done in the poll function, this would bail out of the HRTimer interrupt with IRQ_NONE. So clearly we need to only check the new values available from the proper interrupt handler and not from the poll function, which should rather just read the raw values from the registers, put them into the buffer and be happy. To achieve this: switch the ST Sensors over to using a true threaded interrupt handler. In the interrupt thread, check if new values are available, else yield to the (potential) next device on the same interrupt line to check the registers. If the interrupt was ours, proceed to poll the values. Instead of relying on iio_trigger_generic_data_rdy_poll() as a top half to wake up the thread that polls the sensor for new data, have the thread call iio_trigger_poll_chained() after determining that is is the proper source of the interrupt. This is modelled on drivers/iio/accel/mma8452.c which is already using a properly threaded interrupt handler. In order to get the same precision in timestamps as previously, where samples would be timestamped in the poll function pf->timestamp when calling iio_trigger_generic_data_rdy_poll() we introduce a local timestamp in the sensor data, set it in the top half (fastpath) of the interrupt handler and provide that to the core when calling iio_push_to_buffers_with_timestamp(). Additionally: if the active scanmask is not set for the sensor no IRQs should be enabled and we need to bail out with IRQ_NONE. This can happen if spurious IRQs fire when installing the threaded interrupt handler. Tested with hard interrupt triggers on LIS331DL, then also tested with hrtimers on the same sensor by creating a 75Hz HRTimer and using it to poll the sensor. Signed-off-by: Linus Walleij Cc: Giuseppe Barba Cc: Denis Ciocca Reported-by: Crestez Dan Leonard Tested-by: Crestez Dan Leonard Tested-by: Jonathan Cameron Fixes: 97865fe41322 ("iio: st_sensors: verify interrupt event to status") Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_buffer.c | 2 +- drivers/iio/accel/st_accel_core.c | 1 + .../iio/common/st_sensors/st_sensors_buffer.c | 25 ++---- .../iio/common/st_sensors/st_sensors_core.c | 3 + .../common/st_sensors/st_sensors_trigger.c | 88 ++++++++++++++++++- drivers/iio/gyro/st_gyro_buffer.c | 2 +- drivers/iio/gyro/st_gyro_core.c | 1 + drivers/iio/magnetometer/st_magn_buffer.c | 2 +- drivers/iio/magnetometer/st_magn_core.c | 1 + drivers/iio/pressure/st_pressure_buffer.c | 2 +- drivers/iio/pressure/st_pressure_core.c | 1 + include/linux/iio/common/st_sensors.h | 9 +- 12 files changed, 111 insertions(+), 26 deletions(-) diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c index a1e642ee13d6..7fddc137e91e 100644 --- a/drivers/iio/accel/st_accel_buffer.c +++ b/drivers/iio/accel/st_accel_buffer.c @@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_accel_buffer_setup_ops = { int st_accel_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_accel_buffer_setup_ops); } diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index dc73f2d85e6d..4d95bfc4786c 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -741,6 +741,7 @@ static const struct iio_info accel_info = { static const struct iio_trigger_ops st_accel_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_ACCEL_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_ACCEL_TRIGGER_OPS (&st_accel_trigger_ops) #else diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index c55898543a47..f1693dbebb8a 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -57,31 +57,20 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct st_sensor_data *sdata = iio_priv(indio_dev); + s64 timestamp; - /* If we have a status register, check if this IRQ came from us */ - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) { - u8 status; - - len = sdata->tf->read_byte(&sdata->tb, sdata->dev, - sdata->sensor_settings->drdy_irq.addr_stat_drdy, - &status); - if (len < 0) - dev_err(sdata->dev, "could not read channel status\n"); - - /* - * If this was not caused by any channels on this sensor, - * return IRQ_NONE - */ - if (!(status & (u8)indio_dev->active_scan_mask[0])) - return IRQ_NONE; - } + /* If we do timetamping here, do it before reading the values */ + if (sdata->hw_irq_trigger) + timestamp = sdata->hw_timestamp; + else + timestamp = iio_get_time_ns(); len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data); if (len < 0) goto st_sensors_get_buffer_element_error; iio_push_to_buffers_with_timestamp(indio_dev, sdata->buffer_data, - pf->timestamp); + timestamp); st_sensors_get_buffer_element_error: iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index dffe00692169..928ee68fcc5f 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -424,6 +424,9 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable) else drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2; + /* Flag to the poll function that the hardware trigger is in use */ + sdata->hw_irq_trigger = enable; + /* Enable/Disable the interrupt generator for data ready. */ err = st_sensors_write_data_with_mask(indio_dev, sdata->sensor_settings->drdy_irq.addr, diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index da72279fcf99..1f59bcc0f143 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -17,6 +17,73 @@ #include #include "st_sensors_core.h" +/** + * st_sensors_irq_handler() - top half of the IRQ-based triggers + * @irq: irq number + * @p: private handler data + */ +irqreturn_t st_sensors_irq_handler(int irq, void *p) +{ + struct iio_trigger *trig = p; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); + struct st_sensor_data *sdata = iio_priv(indio_dev); + + /* Get the time stamp as close in time as possible */ + sdata->hw_timestamp = iio_get_time_ns(); + return IRQ_WAKE_THREAD; +} + +/** + * st_sensors_irq_thread() - bottom half of the IRQ-based triggers + * @irq: irq number + * @p: private handler data + */ +irqreturn_t st_sensors_irq_thread(int irq, void *p) +{ + struct iio_trigger *trig = p; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); + struct st_sensor_data *sdata = iio_priv(indio_dev); + int ret; + + /* + * If this trigger is backed by a hardware interrupt and we have a + * status register, check if this IRQ came from us + */ + if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) { + u8 status; + + ret = sdata->tf->read_byte(&sdata->tb, sdata->dev, + sdata->sensor_settings->drdy_irq.addr_stat_drdy, + &status); + if (ret < 0) { + dev_err(sdata->dev, "could not read channel status\n"); + goto out_poll; + } + /* + * the lower bits of .active_scan_mask[0] is directly mapped + * to the channels on the sensor: either bit 0 for + * one-dimensional sensors, or e.g. x,y,z for accelerometers, + * gyroscopes or magnetometers. No sensor use more than 3 + * channels, so cut the other status bits here. + */ + status &= 0x07; + + /* + * If this was not caused by any channels on this sensor, + * return IRQ_NONE + */ + if (!indio_dev->active_scan_mask) + return IRQ_NONE; + if (!(status & (u8)indio_dev->active_scan_mask[0])) + return IRQ_NONE; + } + +out_poll: + /* It's our IRQ: proceed to handle the register polling */ + iio_trigger_poll_chained(p); + return IRQ_HANDLED; +} + int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops) { @@ -77,9 +144,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->sensor_settings->drdy_irq.addr_stat_drdy) irq_trig |= IRQF_SHARED; - err = request_threaded_irq(irq, - iio_trigger_generic_data_rdy_poll, - NULL, + /* Let's create an interrupt thread masking the hard IRQ here */ + irq_trig |= IRQF_ONESHOT; + + err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev), + st_sensors_irq_handler, + st_sensors_irq_thread, irq_trig, sdata->trig->name, sdata->trig); @@ -119,6 +189,18 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev) } EXPORT_SYMBOL(st_sensors_deallocate_trigger); +int st_sensors_validate_device(struct iio_trigger *trig, + struct iio_dev *indio_dev) +{ + struct iio_dev *indio = iio_trigger_get_drvdata(trig); + + if (indio != indio_dev) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(st_sensors_validate_device); + MODULE_AUTHOR("Denis Ciocca "); MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c index d67b17b6a7aa..a5377044e42f 100644 --- a/drivers/iio/gyro/st_gyro_buffer.c +++ b/drivers/iio/gyro/st_gyro_buffer.c @@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_gyro_buffer_setup_ops = { int st_gyro_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_gyro_buffer_setup_ops); } diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index 52a3c87c375c..a8012955a1f6 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -409,6 +409,7 @@ static const struct iio_info gyro_info = { static const struct iio_trigger_ops st_gyro_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_GYRO_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_GYRO_TRIGGER_OPS (&st_gyro_trigger_ops) #else diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index ecd3bd0a9769..0a9e8fadfa9d 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { int st_magn_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_magn_buffer_setup_ops); } diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 62036d2a9956..8250fc322c56 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -572,6 +572,7 @@ static const struct iio_info magn_info = { static const struct iio_trigger_ops st_magn_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_MAGN_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_MAGN_TRIGGER_OPS (&st_magn_trigger_ops) #else diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c index 2ff53f222352..99468d0a64e7 100644 --- a/drivers/iio/pressure/st_pressure_buffer.c +++ b/drivers/iio/pressure/st_pressure_buffer.c @@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_press_buffer_setup_ops = { int st_press_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_press_buffer_setup_ops); } diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 257b58ac6779..92a118c3c4ac 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -445,6 +445,7 @@ static const struct iio_info press_info = { static const struct iio_trigger_ops st_press_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_PRESS_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_PRESS_TRIGGER_OPS (&st_press_trigger_ops) #else diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index d029ffac0d69..99403b19092f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -223,6 +223,8 @@ struct st_sensor_settings { * @get_irq_data_ready: Function to get the IRQ used for data ready signal. * @tf: Transfer function structure used by I/O operations. * @tb: Transfer buffers and mutex used by I/O operations. + * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. + * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. */ struct st_sensor_data { struct device *dev; @@ -247,6 +249,9 @@ struct st_sensor_data { const struct st_sensor_transfer_function *tf; struct st_sensor_transfer_buffer tb; + + bool hw_irq_trigger; + s64 hw_timestamp; }; #ifdef CONFIG_IIO_BUFFER @@ -260,7 +265,8 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops); void st_sensors_deallocate_trigger(struct iio_dev *indio_dev); - +int st_sensors_validate_device(struct iio_trigger *trig, + struct iio_dev *indio_dev); #else static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops) @@ -271,6 +277,7 @@ static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev) { return; } +#define st_sensors_validate_device NULL #endif int st_sensors_init_sensor(struct iio_dev *indio_dev, From ff05916f94f912b25e8efcf9b02c10d481977bab Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Fri, 13 May 2016 21:43:33 +0300 Subject: [PATCH 0050/1050] iio: st_sensors: Init trigger before irq request This fixes a possible race where an interrupt arrives before complete initialization and crashes because iio_trigger_get_drvdata returns NULL. Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Crestez Dan Leonard Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/common/st_sensors/st_sensors_trigger.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 1f59bcc0f143..296e4ff19ae8 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -97,6 +97,10 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, return -ENOMEM; } + iio_trigger_set_drvdata(sdata->trig, indio_dev); + sdata->trig->ops = trigger_ops; + sdata->trig->dev.parent = sdata->dev; + irq = sdata->get_irq_data_ready(indio_dev); irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq)); /* @@ -158,10 +162,6 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, goto iio_trigger_free; } - iio_trigger_set_drvdata(sdata->trig, indio_dev); - sdata->trig->ops = trigger_ops; - sdata->trig->dev.parent = sdata->dev; - err = iio_trigger_register(sdata->trig); if (err < 0) { dev_err(&indio_dev->dev, "failed to register iio trigger.\n"); From 99147606155f09feccac67c65387dc62260b749b Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Fri, 13 May 2016 21:43:34 +0300 Subject: [PATCH 0051/1050] iio: st_sensors: Disable DRDY at init time This fixes odd behavior after reboot. The fact that we set the device to powerdown mode is not sufficient to prevent DRDY being active because we might still have an unread sample. Even if powerdown was sufficient keeping DRDY disabled while trigger is not active is a good idea. Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Crestez Dan Leonard Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/common/st_sensors/st_sensors_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 928ee68fcc5f..9e59c90f6a8d 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -363,6 +363,11 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, if (err < 0) return err; + /* Disable DRDY, this might be still be enabled after reboot. */ + err = st_sensors_set_dataready_irq(indio_dev, false); + if (err < 0) + return err; + if (sdata->current_fullscale) { err = st_sensors_set_fullscale(indio_dev, sdata->current_fullscale->num); From 2547476a5e4061f6addb88d5fc837d3a950f54c4 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:45:35 +0200 Subject: [PATCH 0052/1050] Fix typos Signed-off-by: Andrea Gelmini Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- arch/arc/include/asm/entry-compact.h | 4 ++-- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- arch/arc/include/asm/processor.h | 2 +- arch/arc/include/asm/smp.h | 2 +- arch/arc/include/asm/thread_info.h | 2 +- arch/arc/include/asm/uaccess.h | 2 +- arch/arc/include/uapi/asm/swab.h | 2 +- arch/arc/kernel/perf_event.c | 2 +- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/signal.c | 2 +- arch/arc/kernel/troubleshoot.c | 2 +- arch/arc/mm/cache.c | 6 +++--- arch/arc/mm/dma.c | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 02fabef2891c..d4df6be66d58 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -127,7 +127,7 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguements. +#default target for make without any arguments. KBUILD_IMAGE := bootpImage all: $(KBUILD_IMAGE) diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index e0e1faf03c50..14c310f2e0b1 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -76,8 +76,8 @@ * We need to be a bit more cautious here. What if a kernel bug in * L1 ISR, caused SP to go whaco (some small value which looks like * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound + * Above brlo alone would treat it as a valid L1-L2 scenario + * instead of shouting around * The only feasible way is to make sure this L2 happened in * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in * L1 ISR before it switches stack diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 1fd467ef658f..b0b87f2447f5 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -83,7 +83,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) local_flush_tlb_all(); /* - * Above checke for rollover of 8 bit ASID in 32 bit container. + * Above check for rollover of 8 bit ASID in 32 bit container. * If the container itself wrapped around, set it to a non zero * "generation" to distinguish from no context */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 034bbdc0ff61..858f98ef7f1b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -47,7 +47,7 @@ * Page Tables are purely for Linux VM's consumption and the bits below are * suited to that (uniqueness). Hence some are not implemented in the TLB and * some have different value in TLB. - * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible becoz they live in + * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in * seperate PD0 and PD1, which combined forms a translation entry) * while for PTE perspective, they are 8 and 9 respectively * with MMU v3: Most bits (except SHARED) represent the exact hardware pos diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index f9048994b22f..16b630fbeb6a 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -78,7 +78,7 @@ struct task_struct; #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) /* - * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode. + * Where about of Task's sp, fp, blink when it was last seen in kernel mode. * Look in process.c for details of kernel stack layout */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 991380438d6b..89fdd1b0a76e 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) * (1) These insn were introduced only in 4.10 release. So for older released * support needed. * - * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be + * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be * gaurantted by the platform (not something which core handles). * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ * disabling for atomicity. diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 3af67455659a..2d79e527fa50 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -103,7 +103,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) /* * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it. - * SYSCALL_TRACE is anways seperately/unconditionally tested right after a + * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a * syscall, so all that reamins to be tested is _TIF_WORK_MASK */ diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index d1da6032b715..a78d5670884f 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) /* - * Algorthmically, for __user_ok() we want do: + * Algorithmically, for __user_ok() we want do: * (start < TASK_SIZE) && (start+len < TASK_SIZE) * where TASK_SIZE could either be retrieved from thread_info->addr_limit or * emitted directly in code. diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h index 095599a73195..71f3918b0fc3 100644 --- a/arch/arc/include/uapi/asm/swab.h +++ b/arch/arc/include/uapi/asm/swab.h @@ -74,7 +74,7 @@ __tmp ^ __in; \ }) -#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */ +#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bswap instruction */ #define __arch_swab32(x) \ ({ \ diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 6fd48021324b..08f03d9b5b3e 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -108,7 +108,7 @@ static void arc_perf_event_update(struct perf_event *event, int64_t delta = new_raw_count - prev_raw_count; /* - * We don't afaraid of hwc->prev_count changing beneath our feet + * We aren't afraid of hwc->prev_count changing beneath our feet * because there's no way for us to re-enter this function anytime. */ local64_set(&hwc->prev_count, new_raw_count); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index f63b8bfefb0c..2ee7a4d758a8 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -392,7 +392,7 @@ void __init setup_arch(char **cmdline_p) /* * If we are here, it is established that @uboot_arg didn't * point to DT blob. Instead if u-boot says it is cmdline, - * Appent to embedded DT cmdline. + * append to embedded DT cmdline. * setup_machine_fdt() would have populated @boot_command_line */ if (uboot_tag == 1) { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 004b7f0bc76c..6cb3736b6b83 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -34,7 +34,7 @@ * -ViXS were still seeing crashes when using insmod to load drivers. * It turned out that the code to change Execute permssions for TLB entries * of user was not guarded for interrupts (mod_tlb_permission) - * This was cauing TLB entries to be overwritten on unrelated indexes + * This was causing TLB entries to be overwritten on unrelated indexes * * Vineetg: July 15th 2008: Bug #94183 * -Exception happens in Delay slot of a JMP, and before user space resumes, diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index a6f91e88ce36..934150e7ac48 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -276,7 +276,7 @@ static int tlb_stats_open(struct inode *inode, struct file *file) return 0; } -/* called on user read(): display the couters */ +/* called on user read(): display the counters */ static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ char __user *user_buf, /* user buffer */ size_t len, /* length of buffer */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9e5eddbb856f..5a294b2c3cb3 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -215,7 +215,7 @@ slc_chk: * ------------------ * This ver of MMU supports variable page sizes (1k-16k): although Linux will * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing + * However from hardware perspective, smaller page sizes aggravate aliasing * meaning more vaddr bits needed to disambiguate the cache-line-op ; * the existing scheme of piggybacking won't work for certain configurations. * Two new registers IC_PTAG and DC_PTAG inttoduced. @@ -302,7 +302,7 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, /* * This is technically for MMU v4, using the MMU v3 programming model - * Special work for HS38 aliasing I-cache configuratino with PAE40 + * Special work for HS38 aliasing I-cache configuration with PAE40 * - upper 8 bits of paddr need to be written into PTAG_HI * - (and needs to be written before the lower 32 bits) * Note that PTAG_HI is hoisted outside the line loop @@ -936,7 +936,7 @@ void arc_cache_init(void) ic->ver, CONFIG_ARC_MMU_VER); /* - * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG * pair to provide vaddr/paddr respectively, just as in MMU v3 */ if (is_isa_arcv2() && ic->alias) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c8e36fa5659..73d7e4c75b7d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -10,7 +10,7 @@ * DMA Coherent API Notes * * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessintg it using a kernel virtual address, with + * implemented by accessing it using a kernel virtual address, with * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. From 49acadff2a0cb4f7ff4efe0fb6c23f5fad81a3b3 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 25 May 2016 15:11:29 +0300 Subject: [PATCH 0053/1050] arc: Get rid of root core-frequency property Now when we switched to usage of real clk devices for CPU core frequency those root properties make no sense any longer. Se we're just getting rid of them here to not confuse readers of our .dts files. Signed-off-by: Alexey Brodkin Cc: Christian Ruppert Cc: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/abilis_tb100.dtsi | 2 -- arch/arc/boot/dts/abilis_tb101.dtsi | 2 -- arch/arc/boot/dts/axc001.dtsi | 1 - arch/arc/boot/dts/axc003.dtsi | 1 - arch/arc/boot/dts/axc003_idu.dtsi | 1 - arch/arc/boot/dts/eznps.dts | 1 - arch/arc/boot/dts/nsim_700.dts | 1 - arch/arc/boot/dts/nsimosci.dts | 1 - arch/arc/boot/dts/nsimosci_hs.dts | 1 - arch/arc/boot/dts/nsimosci_hs_idu.dts | 1 - arch/arc/boot/dts/skeleton.dtsi | 1 - arch/arc/boot/dts/skeleton_hs.dtsi | 1 - arch/arc/boot/dts/skeleton_hs_idu.dtsi | 1 - arch/arc/boot/dts/vdk_axc003.dtsi | 1 - arch/arc/boot/dts/vdk_axc003_idu.dtsi | 1 - 15 files changed, 17 deletions(-) diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi index 3942634f805a..02410b211433 100644 --- a/arch/arc/boot/dts/abilis_tb100.dtsi +++ b/arch/arc/boot/dts/abilis_tb100.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi index b0467229a5c4..f9e7686044eb 100644 --- a/arch/arc/boot/dts/abilis_tb101.dtsi +++ b/arch/arc/boot/dts/abilis_tb101.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 3e02f152edcb..6ae2c476ad82 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <750000000>; /* 750 MHZ */ #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 378e455a94c4..14df46f141bf 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 64c94b2860ab..3d6cfa32bf51 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts index b89f6c3eb352..1e0d225791c1 100644 --- a/arch/arc/boot/dts/eznps.dts +++ b/arch/arc/boot/dts/eznps.dts @@ -18,7 +18,6 @@ / { compatible = "ezchip,arc-nps"; - clock-frequency = <83333333>; /* 83.333333 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&intc>; diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5d5e373e0ebc..63970513e4ae 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsim"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index b5b060adce8a..763d66c883da 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 325e73090a18..4eb97c584b18 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index ee03d7126581..853f897eb2a3 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <5000000>; /* 5 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index 3a10cc633e2b..65808fe0a290 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -13,7 +13,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi index 71fd308a9298..2dfe8037dfbb 100644 --- a/arch/arc/boot/dts/skeleton_hs.dtsi +++ b/arch/arc/boot/dts/skeleton_hs.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi index d1cb25a66989..4c11079f3565 100644 --- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi +++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index ad4ee43bd2ac..0fd6ba985b16 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index a3cb6263c581..82214cd7ba0c 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; From c56d329e28cff2d7a1e77cafc2f9c7e986ba11cb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:52 +0200 Subject: [PATCH 0054/1050] MAINTAINERS: Add file patterns for dma device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Signed-off-by: Vinod Koul --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..f7bbae36d078 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3773,6 +3773,7 @@ Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ S: Maintained F: drivers/dma/ F: include/linux/dmaengine.h +F: Documentation/devicetree/bindings/dma/ F: Documentation/dmaengine/ T: git git://git.infradead.org/users/vkoul/slave-dma.git From 4a9723e8df68cfce4048517ee32e37f78854b6fb Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 12 May 2016 16:54:08 +0200 Subject: [PATCH 0055/1050] dmaengine: at_xdmac: align descriptors on 64 bits Having descriptors aligned on 64 bits allows update CNDA and CUBC in an atomic way. Signed-off-by: Ludovic Desroches Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Cc: stable@vger.kernel.org #v4.1 and later Reviewed-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 8e304b1befc5..ba9b0b73fa47 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -242,7 +242,7 @@ struct at_xdmac_lld { u32 mbr_dus; /* Destination Microblock Stride Register */ }; - +/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ struct at_xdmac_desc { struct at_xdmac_lld lld; enum dma_transfer_direction direction; @@ -253,7 +253,7 @@ struct at_xdmac_desc { unsigned int xfer_size; struct list_head descs_list; struct list_head xfer_node; -}; +} __aligned(sizeof(u64)); static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb) { From 53398f488821c2b5b15291e3debec6ad33f75d3d Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 12 May 2016 16:54:09 +0200 Subject: [PATCH 0056/1050] dmaengine: at_xdmac: fix residue corruption An unexpected value of CUBC can lead to a corrupted residue. A more complex sequence is needed to detect an inaccurate value for NCA or CUBC. Signed-off-by: Ludovic Desroches Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Cc: stable@vger.kernel.org #v4.1 and later Reviewed-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 54 +++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index ba9b0b73fa47..b02494e115fe 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, u32 cur_nda, check_nda, cur_ubc, mask, value; u8 dwidth = 0; unsigned long flags; + bool initd; ret = dma_cookie_status(chan, cookie, txstate); if (ret == DMA_COMPLETE) @@ -1435,34 +1436,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, } /* - * When processing the residue, we need to read two registers but we - * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where - * we stand in the descriptor list and AT_XDMAC_CUBC is used - * to know how many data are remaining for the current descriptor. - * Since the dma channel is not paused to not loose data, between the - * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of - * descriptor. - * For that reason, after reading AT_XDMAC_CUBC, we check if we are - * still using the same descriptor by reading a second time - * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to - * read again AT_XDMAC_CUBC. + * The easiest way to compute the residue should be to pause the DMA + * but doing this can lead to miss some data as some devices don't + * have FIFO. + * We need to read several registers because: + * - DMA is running therefore a descriptor change is possible while + * reading these registers + * - When the block transfer is done, the value of the CUBC register + * is set to its initial value until the fetch of the next descriptor. + * This value will corrupt the residue calculation so we have to skip + * it. + * + * INITD -------- ------------ + * |____________________| + * _______________________ _______________ + * NDA @desc2 \/ @desc3 + * _______________________/\_______________ + * __________ ___________ _______________ + * CUBC 0 \/ MAX desc1 \/ MAX desc2 + * __________/\___________/\_______________ + * + * Since descriptors are aligned on 64 bits, we can assume that + * the update of NDA and CUBC is atomic. * Memory barriers are used to ensure the read order of the registers. - * A max number of retries is set because unlikely it can never ends if - * we are transferring a lot of data with small buffers. + * A max number of retries is set because unlikely it could never ends. */ - cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; - rmb(); - cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { - rmb(); check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; - - if (likely(cur_nda == check_nda)) - break; - - cur_nda = check_nda; + rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); rmb(); cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + rmb(); + cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + + if ((check_nda == cur_nda) && initd) + break; } if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { From 9295c41d77ca93aac79cfca6fa09fa1ca5cab66f Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 12 May 2016 16:54:10 +0200 Subject: [PATCH 0057/1050] dmaengine: at_xdmac: double FIFO flush needed to compute residue Due to the way CUBC register is updated, a double flush is needed to compute an accurate residue. First flush aim is to get data from the DMA FIFO and second one ensures that we won't report data which are not in memory. Signed-off-by: Ludovic Desroches Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Cc: stable@vger.kernel.org #v4.1 and later Reviewed-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index b02494e115fe..75bd6621dc5d 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1425,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, residue = desc->xfer_size; /* * Flush FIFO: only relevant when the transfer is source peripheral - * synchronized. + * synchronized. Flush is needed before reading CUBC because data in + * the FIFO are not reported by CUBC. Reporting a residue of the + * transfer length while we have data in FIFO can cause issue. + * Usecase: atmel USART has a timeout which means I have received + * characters but there is no more character received for a while. On + * timeout, it requests the residue. If the data are in the DMA FIFO, + * we will return a residue of the transfer length. It means no data + * received. If an application is waiting for these data, it will hang + * since we won't have another USART timeout without receiving new + * data. */ mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC; value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM; @@ -1480,6 +1489,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, goto spin_unlock; } + /* + * Flush FIFO: only relevant when the transfer is source peripheral + * synchronized. Another flush is needed here because CUBC is updated + * when the controller sends the data write command. It can lead to + * report data that are not written in the memory or the device. The + * FIFO flush ensures that data are really written. + */ + if ((desc->lld.mbr_cfg & mask) == value) { + at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask); + while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS)) + cpu_relax(); + } + /* * Remove size of all microblocks already transferred and the current * one. Then add the remaining size to transfer of the current From bebef39842c8615180e711c0ff3e8c9c40c397da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 May 2016 12:59:48 +0200 Subject: [PATCH 0058/1050] drm/sun4i: add COMMON_CLK dependency The sun4i drm driver uses the clk-provider interfaces, which are not available when CONFIG_COMMON_CLK is disabled: drivers/gpu/drm/sun4i/sun4i_dotclock.c:19:16: error: field 'hw' has incomplete type struct clk_hw hw; In file included from ../include/asm-generic/bug.h:13:0, from ../arch/arm/include/asm/bug.h:59, from ../include/linux/bug.h:4, from ../include/linux/io.h:23, from ../include/linux/clk-provider.h:14, from ../drivers/gpu/drm/sun4i/sun4i_dotclock.c:13: drivers/gpu/drm/sun4i/sun4i_dotclock.c: In function 'hw_to_dclk': include/linux/kernel.h:831:48: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] ... This adds a Kconfig dependency to prevent the driver from being enabled in this case. Signed-off-by: Arnd Bergmann Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index 99510e64e91a..a4b357db8856 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -1,6 +1,6 @@ config DRM_SUN4I tristate "DRM Support for Allwinner A10 Display Engine" - depends on DRM && ARM + depends on DRM && ARM && COMMON_CLK depends on ARCH_SUNXI || COMPILE_TEST select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER From f1b78f0e759b174ec4f5e3b0dd27a079a2416b66 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 May 2016 17:23:28 +0200 Subject: [PATCH 0059/1050] drm: sun4i: print DMA address correctly The newly added sun4i drm driver prints a dma address using the %x format string, which cannot work when dma_addr_t is 64 bit, and gcc warns about this configuration: drm/sun4i/sun4i_backend.c: In function 'sun4i_backend_update_layer_buffer': drm/sun4i/sun4i_backend.c:193:84: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'dma_addr_t {aka long long unsigned int}' [-Werror=format=] DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr); drm/sun4i/sun4i_backend.c:201:84: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'dma_addr_t {aka long long unsigned int}' [-Werror=format=] DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr); This changes the code to use the explicit %pad format string, which always prints the right length. Signed-off-by: Arnd Bergmann Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_backend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index f7a15c1a93bf..3ab560450a82 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend, /* Get the physical address of the buffer in memory */ gem = drm_fb_cma_get_gem_obj(fb, 0); - DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr); + DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr); /* Compute the start of the displayed memory */ bpp = drm_format_plane_cpp(fb->pixel_format, 0); @@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend, paddr += (state->src_x >> 16) * bpp; paddr += (state->src_y >> 16) * fb->pitches[0]; - DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr); + DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr); /* Write the 32 lower bits of the address (in bits) */ lo_paddr = paddr << 3; From 9fa2568d9fe0c6dbf3253af6840de9389f4e89cb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 May 2016 22:10:52 +0200 Subject: [PATCH 0060/1050] drm: sun4i: fix probe error handling gcc points out a possible uninitialized variable use in sun4i_dclk_create(): drivers/gpu/drm/sun4i/sun4i_dotclock.c: In function 'sun4i_dclk_create': drivers/gpu/drm/sun4i/sun4i_dotclock.c:139:12: error: 'clk_name' may be used uninitialized in this function [-Werror=maybe-uninitialized] init.name = clk_name; The warning only shows up when CONFIG_OF is disabled, and the property is never filled, but the same bug can show up even when CONFIG_OF is enabled but of_property_read_string_index returns another error. To fix it, this ensures that sun4i_dclk_create propagates any error from of_property_read_string_index. Signed-off-by: Arnd Bergmann Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_dotclock.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c index 3ff668cb463c..6c9c090a8006 100644 --- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c +++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c @@ -127,10 +127,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon) const char *clk_name, *parent_name; struct clk_init_data init; struct sun4i_dclk *dclk; + int ret; parent_name = __clk_get_name(tcon->sclk0); - of_property_read_string_index(dev->of_node, "clock-output-names", 0, - &clk_name); + ret = of_property_read_string_index(dev->of_node, + "clock-output-names", 0, + &clk_name); + if (ret) + return ret; dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL); if (!dclk) From 4731a72df273bdbd225445424057c15dbb8d3495 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sat, 2 Apr 2016 12:30:11 +0200 Subject: [PATCH 0061/1050] drm/sun4i: request exact rates to our parents Our pixel clock currently only tries to deal with the current parent rate. While that works when the resolution is the same than the one already program, or when we can compute directly the rate from the current parent rate, it cannot work in most situation when we want to change the frequency, and we end up with an improper pixel clock rate, which obviously doesn't work as expected. Ask our parent for all the possible dividers if it can reach that frequency, and return the best parent rate to the clock framework so that we can use it. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_dotclock.c | 31 ++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c index 6c9c090a8006..5b3463197c48 100644 --- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c +++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c @@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw, static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate) { - return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate); + unsigned long best_parent = 0; + u8 best_div = 1; + int i; + + for (i = 6; i < 127; i++) { + unsigned long ideal = rate * i; + unsigned long rounded; + + rounded = clk_hw_round_rate(clk_hw_get_parent(hw), + ideal); + + if (rounded == ideal) { + best_parent = rounded; + best_div = i; + goto out; + } + + if ((rounded < ideal) && (rounded > best_parent)) { + best_parent = rounded; + best_div = i; + } + } + +out: + *parent_rate = best_parent; + + return best_parent / best_div; } static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { struct sun4i_dclk *dclk = hw_to_dclk(hw); - int div = DIV_ROUND_CLOSEST(parent_rate, rate); + u8 div = parent_rate / rate; return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG, GENMASK(6, 0), div); @@ -144,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon) init.ops = &sun4i_dclk_ops; init.parent_names = &parent_name; init.num_parents = 1; + init.flags = CLK_SET_RATE_PARENT; dclk->regmap = tcon->regs; dclk->hw.init = &init; From bb43d40d7c830da5f623e3938fef908b003b7523 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 21 Apr 2016 11:25:26 +0200 Subject: [PATCH 0062/1050] drm/sun4i: rgb: Validate the clock rate Our pixel clock cannot reach a high enough rate for some rather high while common resolutions (like 1080p60). Make sure we filter the resolutions we cannot reach in our mode_valid function. Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index ab6494818050..fe7ef52a9346 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector) static int sun4i_rgb_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector); + struct sun4i_drv *drv = rgb->drv; + struct sun4i_tcon *tcon = drv->tcon; u32 hsync = mode->hsync_end - mode->hsync_start; u32 vsync = mode->vsync_end - mode->vsync_start; + unsigned long rate = mode->clock * 1000; + long rounded_rate; DRM_DEBUG_DRIVER("Validating modes...\n"); @@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector, DRM_DEBUG_DRIVER("Vertical parameters OK\n"); + rounded_rate = clk_round_rate(tcon->dclk, rate); + if (rounded_rate < rate) + return MODE_CLOCK_LOW; + + if (rounded_rate > rate) + return MODE_CLOCK_HIGH; + + DRM_DEBUG_DRIVER("Clock rate OK\n"); + return MODE_OK; } From 0bbbb00bda57e6f091275b0103445596322b9277 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 4 May 2016 17:38:32 +0200 Subject: [PATCH 0063/1050] drm/sun4i: defer only if we didn't find our panel Our code currently defers our probe on any error, even if we were not expecting to have one at all. Make sure we return -EPROBE_DEFER only when we were supposed to have a panel, but it's not probed yet. Also fix a typo while we're at it. Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 9f19b0e08560..16ab426ffa34 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node) remote = of_graph_get_remote_port_parent(end_node); if (!remote) { - DRM_DEBUG_DRIVER("Enable to parse remote node\n"); + DRM_DEBUG_DRIVER("Unable to parse remote node\n"); return ERR_PTR(-EINVAL); } - return of_drm_find_panel(remote); + return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER); } static int sun4i_tcon_bind(struct device *dev, struct device *master, @@ -522,12 +522,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev) * Defer the probe. */ panel = sun4i_tcon_find_panel(node); - if (IS_ERR(panel)) { - /* - * If we don't have a panel endpoint, just go on - */ - if (PTR_ERR(panel) != -ENODEV) - return -EPROBE_DEFER; + + /* + * If we don't have a panel endpoint, just go on + */ + if (PTR_ERR(panel) == -EPROBE_DEFER) { + DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n"); + return -EPROBE_DEFER; } return component_add(&pdev->dev, &sun4i_tcon_ops); From 0de6e914a035076b1241f5738c06c9d3820abd6e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 4 May 2016 17:37:58 +0200 Subject: [PATCH 0064/1050] drm/sun4i: rgb: panel is an error pointer In case of an error, our pointer to the drm_panel structure attached to our encoder will hold an error pointer, not a NULL pointer. Make sure we check the right thing. Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index fe7ef52a9346..aaffe9e64ffb 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -217,7 +217,7 @@ int sun4i_rgb_init(struct drm_device *drm) int ret; /* If we don't have a panel, there's no point in going on */ - if (!tcon->panel) + if (IS_ERR(tcon->panel)) return -ENODEV; rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL); From 3d6bd9065b7acf8499d414d70bdb4b4955856299 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 11 May 2016 16:52:50 +0200 Subject: [PATCH 0065/1050] drm/sun4i: remove simplefb at probe If simplefb was setup by our bootloader and enabled in the DT, we will have a first framebuffer loaded in our system. However, as soon as our DRM driver will load, it will reset the controller, initialise it and, if the framebuffer emulation is enabled, register a second framebuffer device. This is obviously pretty bad, since the first framebuffer will be some kind of a black hole, with memory still reserved that we can write to safely, but not displayed anywhere. Make sure we remove that framebuffer when we probe so we don't end up in that situation. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 76e922bb60e5..af62b24c39b1 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -125,6 +125,22 @@ static struct drm_driver sun4i_drv_driver = { .disable_vblank = sun4i_drv_disable_vblank, }; +static void sun4i_remove_framebuffers(void) +{ + struct apertures_struct *ap; + + ap = alloc_apertures(1); + if (!ap) + return; + + /* The framebuffer can be located anywhere in RAM */ + ap->ranges[0].base = 0; + ap->ranges[0].size = ~0; + + remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false); + kfree(ap); +} + static int sun4i_drv_bind(struct device *dev) { struct drm_device *drm; @@ -172,6 +188,9 @@ static int sun4i_drv_bind(struct device *dev) } drm->irq_enabled = true; + /* Remove early framebuffers (ie. simplefb) */ + sun4i_remove_framebuffers(); + /* Create our framebuffer */ drv->fbdev = sun4i_framebuffer_init(drm); if (IS_ERR(drv->fbdev)) { From 7aa2e2b731b337a7bc74cd076bbcd6b8b4b95fbf Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 11 May 2016 19:04:18 +0200 Subject: [PATCH 0066/1050] drm/sun4i: Convert to connector register helpers Now that connector register helpers have been created, switch to them. Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index af62b24c39b1..257d2b4f3645 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -24,34 +24,6 @@ #include "sun4i_layer.h" #include "sun4i_tcon.h" -static int sun4i_drv_connector_plug_all(struct drm_device *drm) -{ - struct drm_connector *connector, *failed; - int ret; - - mutex_lock(&drm->mode_config.mutex); - list_for_each_entry(connector, &drm->mode_config.connector_list, head) { - ret = drm_connector_register(connector); - if (ret) { - failed = connector; - goto err; - } - } - mutex_unlock(&drm->mode_config.mutex); - return 0; - -err: - list_for_each_entry(connector, &drm->mode_config.connector_list, head) { - if (failed == connector) - break; - - drm_connector_unregister(connector); - } - mutex_unlock(&drm->mode_config.mutex); - - return ret; -} - static int sun4i_drv_enable_vblank(struct drm_device *drm, unsigned int pipe) { struct sun4i_drv *drv = drm->dev_private; @@ -206,7 +178,7 @@ static int sun4i_drv_bind(struct device *dev) if (ret) goto free_drm; - ret = sun4i_drv_connector_plug_all(drm); + ret = drm_connector_register_all(drm); if (ret) goto unregister_drm; @@ -223,6 +195,7 @@ static void sun4i_drv_unbind(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); + drm_connector_unregister_all(drm); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); sun4i_framebuffer_free(drm); From 13fef095bde04228316046f997eb963285d8532e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 17 May 2016 23:56:06 +0800 Subject: [PATCH 0067/1050] drm: sun4i: do cleanup if RGB output init fails sun4i_rgb_init() can fail, which results in TCON failing to bind. In this case we need to do cleanup, specificly unregistering the dotclock, which is regmap based, and the regmap is registered as part of the sun4i_tcon_bind(). Failing to do so results in a NULL pointer reference when the CCF tries to turn off unused clocks. Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 16ab426ffa34..652385f09735 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, return 0; } - return sun4i_rgb_init(drm); + ret = sun4i_rgb_init(drm); + if (ret < 0) + goto err_free_clocks; + + return 0; err_free_clocks: sun4i_tcon_free_clocks(tcon); From 595144c1141c951a3c6bb9004ae6a2bc29aad66f Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 17 May 2016 20:57:50 +0200 Subject: [PATCH 0068/1050] clk: rockchip: initialize flags of clk_init_data in mmc-phase clock The flags element of clk_init_data was never initialized for mmc- phase-clocks resulting in the element containing a random value and thus possibly enabling unwanted clock flags. Fixes: 89bf26cbc1a0 ("clk: rockchip: Add support for the mmc clock phases using the framework") Cc: stable@vger.kernel.org Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-mmc-phase.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c index bc856f21f6b2..460113008bd9 100644 --- a/drivers/clk/rockchip/clk-mmc-phase.c +++ b/drivers/clk/rockchip/clk-mmc-phase.c @@ -154,6 +154,7 @@ struct clk *rockchip_clk_register_mmc(const char *name, return ERR_PTR(-ENOMEM); init.name = name; + init.flags = 0; init.num_parents = num_parents; init.parent_names = parent_names; init.ops = &rockchip_mmc_clk_ops; From 176df69cb080c8cd813e51f800069e13ee607641 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 13 May 2016 11:42:16 -0700 Subject: [PATCH 0069/1050] clk: rockchip: mark rk3399 GIC clocks as critical We never want to kill the GIC. Noticed when making other clock fixups, and seeing the newly-constructed clock tree try to disable cpll, where we had this parent structure: aclk_gic <------\ |--- aclk_gic_pre <-- cpll <-- pll_cpll aclk_gic_noc <--/ Signed-off-by: Brian Norris Reviewed-by: Douglas Anderson Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 291543f52caa..145756c4f3c8 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = { static const char *const rk3399_cru_critical_clocks[] __initconst = { "aclk_cci_pre", + "aclk_gic", + "aclk_gic_noc", "pclk_perilp0", "pclk_perilp0", "hclk_perilp0", From 3bd14ae9da9194d369334556a70a7ff73ef94491 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Fri, 13 May 2016 11:42:17 -0700 Subject: [PATCH 0070/1050] clk: rockchip: fix incorrect parent for rk3399's {c,g}pll_aclk_perihp_src There was a typo, swapping 'c' <--> 'g'. Signed-off-by: Xing Zheng Signed-off-by: Brian Norris Reviewed-by: Douglas Anderson Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 145756c4f3c8..9f86bfef70f7 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { RK3399_CLKGATE_CON(13), 1, GFLAGS), /* perihp */ - GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, + GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, + GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 1, GFLAGS), COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, From 4715f81afc342996f680b08c944a712d9cbef11b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 12 May 2016 11:03:16 -0700 Subject: [PATCH 0071/1050] clk: rockchip: Revert "clk: rockchip: reset init state before mmc card initialization" This reverts commit 7a03fe6f48f3 ("clk: rockchip: reset init state before mmc card initialization"). Though not totally obvious from the commit message nor from the source code, that commit appears to be trying to reset the "_drv" MMC clocks to 90 degrees (note that the "_sample" MMC clocks have a shift of 0 so are not touched). The major problem here is that it doesn't properly reset things. The phase is a two bit field and the commit only touches one of the two bits. Thus the commit had the following affect: - phase 0 => phase 90 - phase 90 => phase 90 - phase 180 => phase 270 - phase 270 => phase 270 Things get even weirder if you happen to have a bootloader that was actually using delay elements (should be no reason to, but you never know), since those are additional bits that weren't touched by the original patch. This is unlikely to be what we actually want. Checking on rk3288-veyron devices, I can see that the bootloader leaves these clocks as: - emmc: phase 180 - sdmmc: phase 90 - sdio0: phase 90 Thus on rk3288-veyron devices the commit we're reverting had the effect of changing the eMMC clock to phase 270. This probably explains the scattered reports I've heard of eMMC devices not working on some veyron devices when using the upstream kernel. The original commit was presumably made because previously the kernel didn't touch the "_drv" phase at all and relied on whatever value was there when the kernel started. If someone was using a bootloader that touched the "_drv" phase then, indeed, we should have code in the kernel to fix that. ...and also, to get ideal timings, we should also have the kernel change the phase depending on the speed mode. In fact, that's the subject of a recent patch I posted at . Ideally, we should take both the patch posted to dw_mmc and this revert. Since those will likely go through different trees, here I describe behavior with the combos: 1. Just this revert: likely will fix rk3288-veyron eMMC on some devices + other cases; might break someone with a strange bootloader that sets the phase to 0 or one that uses delay elements (pretty unpredicable what would happen in that case). 2. Just dw_mmc patch: fixes everyone. Effectly the dw_mmc patch will totally override the broken patch and fix everything. 3. Both patches: fixes everyone. Once dw_mmc is initting properly then any defaults from the clock code doesn't mattery. Fixes: 7a03fe6f48f3 ("clk: rockchip: reset init state before mmc card initialization") Signed-off-by: Douglas Anderson Reviewed-by: Shawn Lin [emmc and sdmmc still work on all current boards in mainline after this revert, so they should take precedence over any out-of-tree board that will hopefully again get fixed with the better upcoming dw_mmc change.] Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-mmc-phase.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c index 460113008bd9..077fcdc7908b 100644 --- a/drivers/clk/rockchip/clk-mmc-phase.c +++ b/drivers/clk/rockchip/clk-mmc-phase.c @@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw, #define ROCKCHIP_MMC_DEGREE_MASK 0x3 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) -#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1 -#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1 #define PSECS_PER_SEC 1000000000000LL @@ -163,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name, mmc_clock->reg = reg; mmc_clock->shift = shift; - /* - * Assert init_state to soft reset the CLKGEN - * for mmc tuning phase and degree - */ - if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT) - writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET, - ROCKCHIP_MMC_INIT_STATE_RESET, - mmc_clock->shift), mmc_clock->reg); - clk = clk_register(NULL, &mmc_clock->hw); if (IS_ERR(clk)) kfree(mmc_clock); From 3183c0d519ff83af2926382c11716496f01d34bf Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Thu, 26 May 2016 21:49:08 +0800 Subject: [PATCH 0072/1050] clk: rockchip: fix cpuclk registration error handling It maybe due to a copy-paste error the error handing should be cclk not clk when checking if the cpuclk registration succeeded. Reported-by: Lin Huang Signed-off-by: Xing Zheng Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c index 4bb130cd0062..05b3d73bfefa 100644 --- a/drivers/clk/rockchip/clk-cpu.c +++ b/drivers/clk/rockchip/clk-cpu.c @@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name, } cclk = clk_register(NULL, &cpuclk->hw); - if (IS_ERR(clk)) { + if (IS_ERR(cclk)) { pr_err("%s: could not register cpuclk %s\n", __func__, name); - ret = PTR_ERR(clk); + ret = PTR_ERR(cclk); goto free_rate_table; } From b7a8daa9f3d1688e994f5557577d3252c94ec282 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 14 May 2016 22:19:53 +0900 Subject: [PATCH 0073/1050] netfilter: nf_ct_helper: Fix helper unregister count. helpers should unregister the only registered ports. but, helper cannot have correct registered ports value when failed to register. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 1 + net/netfilter/nf_conntrack_irc.c | 1 + net/netfilter/nf_conntrack_sane.c | 1 + net/netfilter/nf_conntrack_sip.c | 1 + net/netfilter/nf_conntrack_tftp.c | 1 + 5 files changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 883c691ec8d0..19efeba02abb 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -632,6 +632,7 @@ static int __init nf_conntrack_ftp_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8b6da2719600..f97ac61d2536 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -271,6 +271,7 @@ static int __init nf_conntrack_irc_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_irc_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 7523a575f6d1..3fcbaab83b3d 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -223,6 +223,7 @@ static int __init nf_conntrack_sane_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3e06402739e0..f72ba5587588 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1669,6 +1669,7 @@ static int __init nf_conntrack_sip_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sip_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 36f964066461..2e65b5430fba 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -142,6 +142,7 @@ static int __init nf_conntrack_tftp_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", tftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_tftp_fini(); return ret; } From 83170f3beccccd7ceb4f9a0ac0c4dc736afde90c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 26 May 2016 19:08:10 +0200 Subject: [PATCH 0074/1050] netfilter: nf_dup_ipv6: set again FLOWI_FLAG_KNOWN_NH at flowi6_flags With the commit 48e8aa6e3137 ("ipv6: Set FLOWI_FLAG_KNOWN_NH at flowi6_flags") ip6_pol_route() callers were asked to to set the FLOWI_FLAG_KNOWN_NH properly and xt_TEE was updated accordingly, but with the later refactor in commit bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") the flowi6_flags update was lost. This commit re-add it just before the routing decision. Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_dup_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 6989c70ae29f..4a84b5ad9ecb 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, fl6.daddr = *gw; fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); From eaa2bcd6d1d410a52df8c7b05e76d86c0319b7b0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 27 May 2016 13:34:04 -0400 Subject: [PATCH 0075/1050] netfilter: nf_tables: validate NFTA_SET_TABLE parameter If the NFTA_SET_TABLE parameter is missing and the NLM_F_DUMP flag is not set, then a NULL pointer dereference is triggered in nf_tables_set_lookup because ctx.table is NULL. Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977cd79d..6947e255cdd8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2641,6 +2641,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, /* Only accept unspec with dump */ if (nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; + if (!nla[NFTA_SET_TABLE]) + return -EINVAL; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) From 1691cf160048c0753036d0b3fad7f239234dab56 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 28 May 2016 07:04:38 +0000 Subject: [PATCH 0076/1050] btrfs: Use __u64 in exported linux/btrfs.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes this build error. /usr/include/linux/btrfs.h:121:3: error: unknown type name ‘u64’ u64 devid; ^~~ Fixes: 6b526ed70cf1 ("btrfs: introduce device delete by devid") Signed-off-by: Vinson Lee Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 23c6960e94a4..2bdd1e3e7007 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -118,7 +118,7 @@ struct btrfs_ioctl_vol_args_v2 { }; union { char name[BTRFS_SUBVOL_NAME_MAX + 1]; - u64 devid; + __u64 devid; }; }; From e69e7e03ed225abf3e1c43545aa3bcb68dc81d5f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 15:58:28 +0800 Subject: [PATCH 0077/1050] ALSA: hda/realtek - ALC256 speaker noise issue That is some different register for ALC255 and ALC256. ALC256 can't fit with some ALC255 register. This issue is cause from LDO output voltage control. This patch is updated the right LDO register value. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 52 +++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d53c25e7a1c1..daf4f64f2c24 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3618,13 +3618,20 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, static void alc_headset_mode_unplugged(struct hda_codec *codec) { static struct coef_fw coef0255[] = { - WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/ WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */ WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */ {} }; + static struct coef_fw coef0255_1[] = { + WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ + {} + }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */ + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x1b, 0x0c0b), WRITE_COEF(0x45, 0xc429), @@ -3677,7 +3684,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: + alc_process_coef_fw(codec, coef0255_1); + alc_process_coef_fw(codec, coef0255); + break; case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3896,6 +3907,12 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xd429), WRITE_COEF(0x1b, 0x0c2b), @@ -3936,9 +3953,11 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -3978,6 +3997,12 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xe429), WRITE_COEF(0x1b, 0x0c2b), @@ -4018,9 +4043,11 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -4266,7 +4293,7 @@ static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, static void alc255_set_default_jack_type(struct hda_codec *codec) { /* Set to iphone type */ - static struct coef_fw fw[] = { + static struct coef_fw alc255fw[] = { WRITE_COEF(0x1b, 0x880b), WRITE_COEF(0x45, 0xd089), WRITE_COEF(0x1b, 0x080b), @@ -4274,7 +4301,22 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) WRITE_COEF(0x1b, 0x0c0b), {} }; - alc_process_coef_fw(codec, fw); + static struct coef_fw alc256fw[] = { + WRITE_COEF(0x1b, 0x884b), + WRITE_COEF(0x45, 0xd089), + WRITE_COEF(0x1b, 0x084b), + WRITE_COEF(0x46, 0x0004), + WRITE_COEF(0x1b, 0x0c4b), + {} + }; + switch (codec->core.vendor_id) { + case 0x10ec0255: + alc_process_coef_fw(codec, alc255fw); + break; + case 0x10ec0256: + alc_process_coef_fw(codec, alc256fw); + break; + } msleep(30); } From 6fbae35a3170c3e2b1b9d7b9cc943cbe48771362 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 16:44:20 +0800 Subject: [PATCH 0078/1050] ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703 Support new codecs for ALC700/ALC701/ALC703. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index daf4f64f2c24..7960316d767c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -346,6 +346,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0234: case 0x10ec0274: case 0x10ec0294: + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -2655,6 +2658,7 @@ enum { ALC269_TYPE_ALC256, ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, + ALC269_TYPE_ALC700, }; /* @@ -2686,6 +2690,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC256: case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: + case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; default: @@ -6095,6 +6100,14 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0294: spec->codec_variant = ALC269_TYPE_ALC294; break; + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: + spec->codec_variant = ALC269_TYPE_ALC700; + spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ + alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */ + break; + } if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) { @@ -7050,6 +7063,9 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0670, "ALC670", patch_alc662), HDA_CODEC_ENTRY(0x10ec0671, "ALC671", patch_alc662), HDA_CODEC_ENTRY(0x10ec0680, "ALC680", patch_alc680), + HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc882), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), From 06061dc63bff8d609bc8d64f5d3ef59d0a673b25 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 May 2016 14:49:53 +0200 Subject: [PATCH 0079/1050] phy: exynos-mipi-video: avoid uninitialized variable use A rework of the exynos-mipi-video driver caused a warning about the new __set_phy_state function potentially accessing a variable before its initialization: drivers/phy/phy-exynos-mipi-video.c: In function '__set_phy_state': drivers/phy/phy-exynos-mipi-video.c:238:13: error: 'val' may be used uninitialized in this function [-Werror=maybe-uninitialized] return val & data->resetn_val; ~~~~^~~~~~~~~~~~~~~~~~ drivers/phy/phy-exynos-mipi-video.c:235:6: note: 'val' was declared here u32 val; The failure scenario here is the offset passed into a the stub regmap_read() function that does not modify its output, however regmap_read() can also fail for other reasons, so adding error handling (in this case, returning zero from is_running) seems the best solution. Note that this warning showed up with the ARM s5pv210_defconfig, indicating that we most likely want to either enable CONFIG_REGMAP in that defconfig as well, or disable the phy-exynos-mipi-video driver. Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Fixes: 97a3042f7616 ("phy: exynos-mipi-video: Rewrite handling of phy registers") Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-exynos-mipi-video.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c index cc093ebfda94..8b851f718123 100644 --- a/drivers/phy/phy-exynos-mipi-video.c +++ b/drivers/phy/phy-exynos-mipi-video.c @@ -233,8 +233,12 @@ static inline int __is_running(const struct exynos_mipi_phy_desc *data, struct exynos_mipi_video_phy *state) { u32 val; + int ret; + + ret = regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val); + if (ret) + return 0; - regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val); return val & data->resetn_val; } From 62a584fe05eef1f80ed49a286a29328f1a224fb9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 May 2016 14:34:46 -0400 Subject: [PATCH 0080/1050] writeback: use higher precision calculation in domain_dirty_limits() As vm.dirty_[background_]bytes can't be applied verbatim to multiple cgroup writeback domains, they get converted to percentages in domain_dirty_limits() and applied the same way as vm.dirty_[background]ratio. However, if the specified bytes is lower than 1% of available memory, the calculated ratios become zero and the writeback domain gets throttled constantly. Fix it by using per-PAGE_SIZE instead of percentage for ratio calculations. Also, the updated DIV_ROUND_UP() usages now should yield 1/4096 (0.0244%) as the minimum ratio as long as the specified bytes are above zero. Signed-off-by: Tejun Heo Reported-by: Miao Xie Link: http://lkml.kernel.org/g/57333E75.3080309@huawei.com Cc: stable@vger.kernel.org # v4.2+ Fixes: 9fc3a43e1757 ("writeback: separate out domain_dirty_limits()") Reviewed-by: Jan Kara Adjusted comment based on Jan's suggestion. Signed-off-by: Jens Axboe --- mm/page-writeback.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b9956fdee8f5..e2481949494c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -373,8 +373,9 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc); unsigned long bytes = vm_dirty_bytes; unsigned long bg_bytes = dirty_background_bytes; - unsigned long ratio = vm_dirty_ratio; - unsigned long bg_ratio = dirty_background_ratio; + /* convert ratios to per-PAGE_SIZE for higher precision */ + unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100; + unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100; unsigned long thresh; unsigned long bg_thresh; struct task_struct *tsk; @@ -386,26 +387,28 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) /* * The byte settings can't be applied directly to memcg * domains. Convert them to ratios by scaling against - * globally available memory. + * globally available memory. As the ratios are in + * per-PAGE_SIZE, they can be obtained by dividing bytes by + * number of pages. */ if (bytes) - ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 / - global_avail, 100UL); + ratio = min(DIV_ROUND_UP(bytes, global_avail), + PAGE_SIZE); if (bg_bytes) - bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 / - global_avail, 100UL); + bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail), + PAGE_SIZE); bytes = bg_bytes = 0; } if (bytes) thresh = DIV_ROUND_UP(bytes, PAGE_SIZE); else - thresh = (ratio * available_memory) / 100; + thresh = (ratio * available_memory) / PAGE_SIZE; if (bg_bytes) bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE); else - bg_thresh = (bg_ratio * available_memory) / 100; + bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; if (bg_thresh >= thresh) bg_thresh = thresh / 2; From 60f2b4b8af548150cc56bf6fd213e47897964794 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 30 May 2016 19:21:22 +0530 Subject: [PATCH 0081/1050] ARC: [intc-compact] simplify code for 2 priority levels ARC700 support for 2 interrupt priorities historically allowed even slow perpherals such as emac and uart to setup high priority interrupts which was wrong from the beginning as they could possibly delay the more critical timer interrupt. The hardware support for 2 level interrupts in ARCompact is less than ideal anyways (judging from the "hacks" in low level entry code and thus is not used in productions systems I know of. So reduce the scope of this to timer only, thereby reducing a bunch of complexity. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 22 ++-------------------- arch/arc/kernel/entry-compact.S | 18 ++---------------- arch/arc/kernel/intc-compact.c | 6 ++---- 3 files changed, 6 insertions(+), 40 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0dcbacfdea4b..b14826a4f59c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -186,9 +186,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_HAS_REENTRANT_IRQ_LV2 - def_bool n - config ARC_MCIP bool "ARConnect Multicore IP (MCIP) Support " depends on ISA_ARCV2 @@ -366,25 +363,10 @@ config NODES_SHIFT if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS - bool "ARCompact IRQ Priorities: High(2)/Low(1)" + bool "Setup Timer IRQ as high Priority" default n - # Timer HAS to be high priority, for any other high priority config - select ARC_IRQ3_LV2 # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy - depends on !SMP || ARC_HAS_REENTRANT_IRQ_LV2 - -if ARC_COMPACT_IRQ_LEVELS - -config ARC_IRQ3_LV2 - bool - -config ARC_IRQ5_LV2 - bool - -config ARC_IRQ6_LV2 - bool - -endif #ARC_COMPACT_IRQ_LEVELS + depends on !SMP config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 0cb0abaa0479..98812c1248df 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -91,27 +91,13 @@ VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) ; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_IRQ3_LV2 +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS VECTOR handle_interrupt_level2 #else VECTOR handle_interrupt_level1 #endif -VECTOR handle_interrupt_level1 - -#ifdef CONFIG_ARC_IRQ5_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -#ifdef CONFIG_ARC_IRQ6_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -.rept 25 +.rept 28 VECTOR handle_interrupt_level1 ; Other devices .endr diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index c5cceca36118..ce9deb953ca9 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -28,10 +28,8 @@ void arc_init_IRQ(void) { int level_mask = 0; - /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; + /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ + level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; /* * Write to register, even if no LV2 IRQs configured to reset it From d140b9bfcad9e53f1da67ad09dd5092b44d55c7b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 11:46:47 +0530 Subject: [PATCH 0082/1050] ARC: don't enable DISCONTIGMEM unconditionally Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b14826a4f59c..be9d0b5ae0cc 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -61,7 +61,7 @@ config RWSEM_GENERIC_SPINLOCK def_bool y config ARCH_DISCONTIGMEM_ENABLE - def_bool y + def_bool n config ARCH_FLATMEM_ENABLE def_bool y @@ -453,7 +453,7 @@ config LINUX_LINK_BASE config HIGHMEM bool "High Memory Support" - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE From f1bddbb3de60872acc2446eee97dbeb0a6d57acb Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 5 May 2016 10:46:05 +0200 Subject: [PATCH 0083/1050] usb: gadget: Fix binding to UDC via configfs interface By default user could store only valid UDC name in configfs UDC attr by doing: echo $UDC_NAME > UDC Commit (855ed04 "usb: gadget: udc-core: independent registration of gadgets and gadget drivers") broke this behavior and allowed to store any arbitrary string in UDC file and udc core was waiting for such controller to appear. echo "any arbitrary string here" > UDC This commit fix this by adding a flag which prevents configfs gadget from being added to list of pending drivers if UDC with given name has not been found. Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 1 + drivers/usb/gadget/udc/udc-core.c | 12 ++++++++---- include/linux/usb/gadget.h | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index b6f60ca8a035..70cf3477f951 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1401,6 +1401,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .owner = THIS_MODULE, .name = "configfs-gadget", }, + .match_existing_only = 1, }; static struct config_group *gadgets_make( diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index 6e8300d6a737..e1b2dcebdc2e 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -603,11 +603,15 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver) } } - list_add_tail(&driver->pending, &gadget_driver_pending_list); - pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n", - driver->function); + if (!driver->match_existing_only) { + list_add_tail(&driver->pending, &gadget_driver_pending_list); + pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n", + driver->function); + ret = 0; + } + mutex_unlock(&udc_lock); - return 0; + return ret; found: ret = udc_bind_to_driver(udc, driver); mutex_unlock(&udc_lock); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 457651bf45b0..fefe8b06a63d 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1034,6 +1034,8 @@ static inline int usb_gadget_activate(struct usb_gadget *gadget) * @udc_name: A name of UDC this driver should be bound to. If udc_name is NULL, * this driver will be bound to any available UDC. * @pending: UDC core private data used for deferred probe of this driver. + * @match_existing_only: If udc is not found, return an error and don't add this + * gadget driver to list of pending driver * * Devices are disabled till a gadget driver successfully bind()s, which * means the driver will handle setup() requests needed to enumerate (and @@ -1097,6 +1099,7 @@ struct usb_gadget_driver { char *udc_name; struct list_head pending; + unsigned match_existing_only:1; }; From 4879efb34f7d49235fac334d76d9c6a77a021413 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 24 May 2016 20:13:15 +0200 Subject: [PATCH 0084/1050] usb: dwc3: exynos: Fix deferred probing storm. dwc3-exynos has two problems during init if the regulators are slow to come up (for instance if the I2C bus driver is not on the initramfs) and return probe deferral. First, every time this happens, the driver leaks the USB phys created; they need to be deallocated on error. Second, since the phy devices are created before the regulators fail, this means that there's a new device to re-trigger deferred probing, which causes it to essentially go into a busy loop of re-probing the device until the regulators come up. Move the phy creation to after the regulators have succeeded, and also fix cleanup on failure. On my ODROID XU4 system (with Debian's initramfs which doesn't contain the I2C driver), this reduces the number of probe attempts (for each of the two controllers) from more than 2000 to eight. Signed-off-by: Steinar H. Gunderson Reviewed-by: Krzysztof Kozlowski Reviewed-by: Vivek Gautam Fixes: d720f057fda4 ("usb: dwc3: exynos: add nop transceiver support") Cc: Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-exynos.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index dd5cb5577dca..2f1fb7e7aa54 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -128,12 +128,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev) platform_set_drvdata(pdev, exynos); - ret = dwc3_exynos_register_phys(exynos); - if (ret) { - dev_err(dev, "couldn't register PHYs\n"); - return ret; - } - exynos->dev = dev; exynos->clk = devm_clk_get(dev, "usbdrd30"); @@ -183,20 +177,29 @@ static int dwc3_exynos_probe(struct platform_device *pdev) goto err3; } + ret = dwc3_exynos_register_phys(exynos); + if (ret) { + dev_err(dev, "couldn't register PHYs\n"); + goto err4; + } + if (node) { ret = of_platform_populate(node, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to add dwc3 core\n"); - goto err4; + goto err5; } } else { dev_err(dev, "no device node, failed to add dwc3 core\n"); ret = -ENODEV; - goto err4; + goto err5; } return 0; +err5: + platform_device_unregister(exynos->usb2_phy); + platform_device_unregister(exynos->usb3_phy); err4: regulator_disable(exynos->vdd10); err3: From 0015f9156092d07b3ec06d37d014328419d5832e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 May 2016 07:48:10 +0300 Subject: [PATCH 0085/1050] usb: f_fs: off by one bug in _ffs_func_bind() This loop is supposed to set all the .num[] values to -1 but it's off by one so it skips the first element and sets one element past the end of the array. I've cleaned up the loop a little as well. Fixes: ddf8abd25994 ('USB: f_fs: the FunctionFS driver') Acked-by: Michal Nazarewicz Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 73515d54e1cc..d26eb64e59b6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2729,6 +2729,7 @@ static int _ffs_func_bind(struct usb_configuration *c, func->ffs->ss_descs_count; int fs_len, hs_len, ss_len, ret, i; + struct ffs_ep *eps_ptr; /* Make it a single chunk, less management later on */ vla_group(d); @@ -2777,12 +2778,9 @@ static int _ffs_func_bind(struct usb_configuration *c, ffs->raw_descs_length); memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz); - for (ret = ffs->eps_count; ret; --ret) { - struct ffs_ep *ptr; - - ptr = vla_ptr(vlabuf, d, eps); - ptr[ret].num = -1; - } + eps_ptr = vla_ptr(vlabuf, d, eps); + for (i = 0; i < ffs->eps_count; i++) + eps_ptr[i].num = -1; /* Save pointers * d_eps == vlabuf, func->eps used to kfree vlabuf later From d246dcb2331c5783743720e6510892eb1d2801d9 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 26 May 2016 11:43:45 -0500 Subject: [PATCH 0086/1050] usb: gadget: fix spinlock dead lock in gadgetfs [ 40.467381] ============================================= [ 40.473013] [ INFO: possible recursive locking detected ] [ 40.478651] 4.6.0-08691-g7f3db9a #37 Not tainted [ 40.483466] --------------------------------------------- [ 40.489098] usb/733 is trying to acquire lock: [ 40.493734] (&(&dev->lock)->rlock){-.....}, at: [] ep0_complete+0x18/0xdc [gadgetfs] [ 40.502882] [ 40.502882] but task is already holding lock: [ 40.508967] (&(&dev->lock)->rlock){-.....}, at: [] ep0_read+0x20/0x5e0 [gadgetfs] [ 40.517811] [ 40.517811] other info that might help us debug this: [ 40.524623] Possible unsafe locking scenario: [ 40.524623] [ 40.530798] CPU0 [ 40.533346] ---- [ 40.535894] lock(&(&dev->lock)->rlock); [ 40.540088] lock(&(&dev->lock)->rlock); [ 40.544284] [ 40.544284] *** DEADLOCK *** [ 40.544284] [ 40.550461] May be due to missing lock nesting notation [ 40.550461] [ 40.557544] 2 locks held by usb/733: [ 40.561271] #0: (&f->f_pos_lock){+.+.+.}, at: [] __fdget_pos+0x40/0x48 [ 40.569219] #1: (&(&dev->lock)->rlock){-.....}, at: [] ep0_read+0x20/0x5e0 [gadgetfs] [ 40.578523] [ 40.578523] stack backtrace: [ 40.583075] CPU: 0 PID: 733 Comm: usb Not tainted 4.6.0-08691-g7f3db9a #37 [ 40.590246] Hardware name: Generic AM33XX (Flattened Device Tree) [ 40.596625] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 40.604718] [] (show_stack) from [] (dump_stack+0xb0/0xe4) [ 40.612267] [] (dump_stack) from [] (__lock_acquire+0xf68/0x1994) [ 40.620440] [] (__lock_acquire) from [] (lock_acquire+0xd8/0x238) [ 40.628621] [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x38/0x4c) [ 40.637440] [] (_raw_spin_lock_irqsave) from [] (ep0_complete+0x18/0xdc [gadgetfs]) [ 40.647339] [] (ep0_complete [gadgetfs]) from [] (musb_g_giveback+0x118/0x1b0 [musb_hdrc]) [ 40.657842] [] (musb_g_giveback [musb_hdrc]) from [] (musb_g_ep0_queue+0x16c/0x188 [musb_hdrc]) [ 40.668772] [] (musb_g_ep0_queue [musb_hdrc]) from [] (ep0_read+0x544/0x5e0 [gadgetfs]) [ 40.678963] [] (ep0_read [gadgetfs]) from [] (__vfs_read+0x20/0x110) [ 40.687414] [] (__vfs_read) from [] (vfs_read+0x88/0x114) [ 40.694864] [] (vfs_read) from [] (SyS_read+0x44/0x9c) [ 40.702051] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) This is caused by the spinlock bug in ep0_read(). Fix the two other deadlock sources in gadgetfs_setup() too. Cc: # v3.16+ Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/legacy/inode.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index e64479f882a5..aa3707bdebb4 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -938,8 +938,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) struct usb_ep *ep = dev->gadget->ep0; struct usb_request *req = dev->req; - if ((retval = setup_req (ep, req, 0)) == 0) - retval = usb_ep_queue (ep, req, GFP_ATOMIC); + if ((retval = setup_req (ep, req, 0)) == 0) { + spin_unlock_irq (&dev->lock); + retval = usb_ep_queue (ep, req, GFP_KERNEL); + spin_lock_irq (&dev->lock); + } dev->state = STATE_DEV_CONNECTED; /* assume that was SET_CONFIGURATION */ @@ -1457,8 +1460,11 @@ delegate: w_length); if (value < 0) break; + + spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, dev->req, - GFP_ATOMIC); + GFP_KERNEL); + spin_lock (&dev->lock); if (value < 0) { clean_req (gadget->ep0, dev->req); break; @@ -1481,11 +1487,14 @@ delegate: if (value >= 0 && dev->state != STATE_DEV_SETUP) { req->length = value; req->zero = value < w_length; - value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC); + + spin_unlock (&dev->lock); + value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL); if (value < 0) { DBG (dev, "ep_queue --> %d\n", value); req->status = 0; } + return value; } /* device stalls when value < 0 */ From 51da43b555ba19e0230ff5a5acc58eb0fffb6026 Mon Sep 17 00:00:00 2001 From: Vahram Aharonyan Date: Mon, 23 May 2016 22:41:57 -0700 Subject: [PATCH 0087/1050] usb: dwc2: gadget: Do not halt endpoint if active The gadget API function usb_ep_set_halt() expects the gadget to return -EAGAIN if the ep is active. Add support for this behavior. Otherwise this may break mass storage protocol if a STALL is attempted on the endpoint. Signed-off-by: Vahram Aharonyan Signed-off-by: John Youn Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4c5e3005e1dc..e4e2a9031dee 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1018,7 +1018,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg, return 1; } -static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value); +static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now); /** * get_ep_head - return the first request on the endpoint @@ -1094,7 +1094,7 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg, case USB_ENDPOINT_HALT: halted = ep->halted; - dwc2_hsotg_ep_sethalt(&ep->ep, set); + dwc2_hsotg_ep_sethalt(&ep->ep, set, true); ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0); if (ret) { @@ -2948,8 +2948,13 @@ static int dwc2_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req) * dwc2_hsotg_ep_sethalt - set halt on a given endpoint * @ep: The endpoint to set halt. * @value: Set or unset the halt. + * @now: If true, stall the endpoint now. Otherwise return -EAGAIN if + * the endpoint is busy processing requests. + * + * We need to stall the endpoint immediately if request comes from set_feature + * protocol command handler. */ -static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value) +static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now) { struct dwc2_hsotg_ep *hs_ep = our_ep(ep); struct dwc2_hsotg *hs = hs_ep->parent; @@ -2969,6 +2974,12 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value) return 0; } + if (!now && value && !list_empty(&hs_ep->queue)) { + dev_dbg(hs->dev, "%s request is pending, cannot halt\n", + ep->name); + return -EAGAIN; + } + if (hs_ep->dir_in) { epreg = DIEPCTL(index); epctl = dwc2_readl(hs->regs + epreg); @@ -3020,7 +3031,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value) int ret = 0; spin_lock_irqsave(&hs->lock, flags); - ret = dwc2_hsotg_ep_sethalt(ep, value); + ret = dwc2_hsotg_ep_sethalt(ep, value, false); spin_unlock_irqrestore(&hs->lock, flags); return ret; From 15186f1011b088432a10f435aa6e2df5ab177503 Mon Sep 17 00:00:00 2001 From: Vahram Aharonyan Date: Mon, 23 May 2016 22:41:59 -0700 Subject: [PATCH 0088/1050] usb: dwc2: gadget: Do not halt isochronous endpoints Add a check in dwc2_hsotg_ep_sethalt() so that it does not halt isochronous endpoints. Signed-off-by: Vahram Aharonyan Signed-off-by: John Youn Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e4e2a9031dee..26cf09d0fe3c 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2974,6 +2974,11 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now) return 0; } + if (hs_ep->isochronous) { + dev_err(hs->dev, "%s is Isochronous Endpoint\n", ep->name); + return -EINVAL; + } + if (!now && value && !list_empty(&hs_ep->queue)) { dev_dbg(hs->dev, "%s request is pending, cannot halt\n", ep->name); From 375f62e7dd38440f9b7b31d99a36cc4286f47004 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 22 May 2016 15:35:13 +0200 Subject: [PATCH 0089/1050] Documentation: configfs-usb-gadget-uvc: fix kernel version v3.20 doesn't exist, it is actually v4.0. Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi --- .../ABI/testing/configfs-usb-gadget-uvc | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index 2f4a0051b32d..1ba0d0fda9c0 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -1,6 +1,6 @@ What: /config/usb-gadget/gadget/functions/uvc.name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: UVC function directory streaming_maxburst - 0..15 (ss only) @@ -9,37 +9,37 @@ Description: UVC function directory What: /config/usb-gadget/gadget/functions/uvc.name/control Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Control descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/class Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/class/ss Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Super speed control class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/class/fs Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Full speed control class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Terminal descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/output Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Output terminal descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/output/default Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Default output terminal descriptors All attributes read only: @@ -53,12 +53,12 @@ Description: Default output terminal descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Camera terminal descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera/default Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Default camera terminal descriptors All attributes read only: @@ -75,12 +75,12 @@ Description: Default camera terminal descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/processing Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Processing unit descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/processing/default Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Default processing unit descriptors All attributes read only: @@ -94,49 +94,49 @@ Description: Default processing unit descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/header Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Control header descriptors What: /config/usb-gadget/gadget/functions/uvc.name/control/header/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific control header descriptors dwClockFrequency bcdUVC What: /config/usb-gadget/gadget/functions/uvc.name/streaming Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Streaming descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Streaming class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/ss Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Super speed streaming class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/hs Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: High speed streaming class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/fs Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Full speed streaming class descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Color matching descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching/default Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Default color matching descriptors All attributes read only: @@ -150,12 +150,12 @@ Description: Default color matching descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: MJPEG format descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific MJPEG format descriptors All attributes read only, @@ -174,7 +174,7 @@ Description: Specific MJPEG format descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific MJPEG frame descriptors dwFrameInterval - indicates how frame interval can be @@ -196,12 +196,12 @@ Description: Specific MJPEG frame descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Uncompressed format descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific uncompressed format descriptors bmaControls - this format's data for bmaControls in @@ -221,7 +221,7 @@ Description: Specific uncompressed format descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific uncompressed frame descriptors dwFrameInterval - indicates how frame interval can be @@ -243,12 +243,12 @@ Description: Specific uncompressed frame descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Streaming header descriptors What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header/name Date: Dec 2014 -KernelVersion: 3.20 +KernelVersion: 4.0 Description: Specific streaming header descriptors All attributes read only: From e5a89162161d498170e7e39e6cfd2f71458c2b00 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Sun, 22 May 2016 11:08:13 +0200 Subject: [PATCH 0090/1050] usb: gadget: printer: Drop unused device qualifier descriptor This descriptor is never used. Currently device qualifier descriptor is generated by compossite code, so no need to keep it in function file. Signed-off-by: Krzysztof Opasiak Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_printer.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index c45104e3a64b..64706a789580 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -161,14 +161,6 @@ static struct usb_endpoint_descriptor hs_ep_out_desc = { .wMaxPacketSize = cpu_to_le16(512) }; -static struct usb_qualifier_descriptor dev_qualifier = { - .bLength = sizeof(dev_qualifier), - .bDescriptorType = USB_DT_DEVICE_QUALIFIER, - .bcdUSB = cpu_to_le16(0x0200), - .bDeviceClass = USB_CLASS_PRINTER, - .bNumConfigurations = 1 -}; - static struct usb_descriptor_header *hs_printer_function[] = { (struct usb_descriptor_header *) &intf_desc, (struct usb_descriptor_header *) &hs_ep_in_desc, From d4529f9be1d72919f75f76f31773c4e98d03ce6b Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Sun, 22 May 2016 11:08:14 +0200 Subject: [PATCH 0091/1050] usb: gadget: uac2: Drop unused device qualifier descriptor This descriptor is never used. Currently device qualifier descriptor is generated by compossite code so no need to keep it in function file. Signed-off-by: Krzysztof Opasiak Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uac2.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 186d4b162524..1c81dd3b0cd4 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -598,18 +598,6 @@ static struct usb_gadget_strings *fn_strings[] = { NULL, }; -static struct usb_qualifier_descriptor devqual_desc = { - .bLength = sizeof devqual_desc, - .bDescriptorType = USB_DT_DEVICE_QUALIFIER, - - .bcdUSB = cpu_to_le16(0x200), - .bDeviceClass = USB_CLASS_MISC, - .bDeviceSubClass = 0x02, - .bDeviceProtocol = 0x01, - .bNumConfigurations = 1, - .bRESERVED = 0, -}; - static struct usb_interface_assoc_descriptor iad_desc = { .bLength = sizeof iad_desc, .bDescriptorType = USB_DT_INTERFACE_ASSOCIATION, From cc50dc28da9109d585416595fc23ebb2171f3b2f Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Sun, 22 May 2016 11:08:15 +0200 Subject: [PATCH 0092/1050] usb: gadget: storage-common: Fix old comment about qualifier descriptor Device qualifier descriptor is now generated by composite.c code. So let's fix this old comment by removing parts which are no longer valid. Signed-off-by: Krzysztof Opasiak Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/storage_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index d62683017cf3..990df221c629 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -83,9 +83,7 @@ EXPORT_SYMBOL_GPL(fsg_fs_function); * USB 2.0 devices need to expose both high speed and full speed * descriptors, unless they only run at full speed. * - * That means alternate endpoint descriptors (bigger packets) - * and a "device qualifier" ... plus more construction options - * for the configuration descriptor. + * That means alternate endpoint descriptors (bigger packets). */ struct usb_endpoint_descriptor fsg_hs_bulk_in_desc = { .bLength = USB_DT_ENDPOINT_SIZE, From 53642399aa71b7c3b15d0305dc54738c4222bb1e Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Fri, 20 May 2016 18:13:19 +0800 Subject: [PATCH 0093/1050] usb: gadget: f_fs: Fix wrong check on reserved1 of OS_DESC_EXT_COMPAT Current __ffs_data_do_os_desc() of f_fs.c will check reserved1 field of OS_DESC_EXT_COMPAT and return -EINVAL if it's 1. But MS OS 1.0 Descriptors http://msdn.microsoft.com/en-us/library/windows/hardware/gg463179.aspx defines that field to be 1. Signed-off-by: Jim Lin Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d26eb64e59b6..9ac6e868946d 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2051,7 +2051,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || d->bFirstInterfaceNumber >= ffs->interfaces_count || - d->Reserved1) + !d->Reserved1) return -EINVAL; for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) From 23e3439296a55affce3ef0ab78f1c2e03aec8767 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 May 2016 15:52:27 +0200 Subject: [PATCH 0094/1050] usb: dwc2: fix regression on big-endian PowerPC/ARM systems A patch that went into Linux-4.4 to fix big-endian mode on a Lantiq MIPS system unfortunately broke big-endian operation on PowerPC APM82181 as reported by Christian Lamparter, and likely other systems. It actually introduced multiple issues: - it broke big-endian ARM kernels: any machine that was working correctly with a little-endian kernel is no longer using byteswaps on big-endian kernels, which clearly breaks them. - On PowerPC the same thing must be true: if it was working before, using big-endian kernels is now broken. Unlike ARM, 32-bit PowerPC usually uses big-endian kernels, so they are likely all broken. - The barrier for dwc2_writel is on the wrong side of the __raw_writel(), so the MMIO no longer synchronizes with DMA operations. - On architectures that require specific CPU instructions for MMIO access, using the __raw_ variant may turn this into a pointer dereference that does not have the same effect as the readl/writel. This patch is a simple revert for all architectures other than MIPS, in the hope that we can more easily backport it to fix the regression on PowerPC and ARM systems without breaking the Lantiq system again. We should follow this up with a more elaborate change to add runtime detection of endianness, to make sure it also works on all other combinations of architectures and implementations of the usb-dwc2 device. That patch however will be fairly large and not appropriate for backports to stable kernels. Felipe suggested a different approach, using an endianness switching register to always put the device into LE mode, but unfortunately the dwc2 hardware does not provide a generic way to do that. Also, I see no practical way of addressing the problem more generally by patching architecture specific code on MIPS. Fixes: 95c8bc360944 ("usb: dwc2: Use platform endianness when accessing registers") Acked-by: John Youn Tested-by: Christian Lamparter Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 3c58d633ce80..dec0b21fc626 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -64,6 +64,17 @@ DWC2_TRACE_SCHEDULER_VB(pr_fmt("%s: SCH: " fmt), \ dev_name(hsotg->dev), ##__VA_ARGS__) +#ifdef CONFIG_MIPS +/* + * There are some MIPS machines that can run in either big-endian + * or little-endian mode and that use the dwc2 register without + * a byteswap in both ways. + * Unlike other architectures, MIPS apparently does not require a + * barrier before the __raw_writel() to synchronize with DMA but does + * require the barrier after the __raw_writel() to serialize a set of + * writes. This set of operations was added specifically for MIPS and + * should only be used there. + */ static inline u32 dwc2_readl(const void __iomem *addr) { u32 value = __raw_readl(addr); @@ -90,6 +101,22 @@ static inline void dwc2_writel(u32 value, void __iomem *addr) pr_info("INFO:: wrote %08x to %p\n", value, addr); #endif } +#else +/* Normal architectures just use readl/write */ +static inline u32 dwc2_readl(const void __iomem *addr) +{ + return readl(addr); +} + +static inline void dwc2_writel(u32 value, void __iomem *addr) +{ + writel(value, addr); + +#ifdef DWC2_LOG_WRITES + pr_info("info:: wrote %08x to %p\n", value, addr); +#endif +} +#endif /* Maximum number of Endpoints/HostChannels */ #define MAX_EPS_CHANNELS 16 From ffeee83aa0461992e8a99a59db2df31933e60362 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 8 May 2016 23:20:59 +0200 Subject: [PATCH 0095/1050] usb: gadget: avoid exposing kernel stack Function in_rq_cur copies random bytes from the stack. Zero the memory instead. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Heinrich Schuchardt Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uac2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 1c81dd3b0cd4..cd214ec8a601 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1280,6 +1280,7 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr) if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) { struct cntrl_cur_lay3 c; + memset(&c, 0, sizeof(struct cntrl_cur_lay3)); if (entity_id == USB_IN_CLK_ID) c.dCUR = p_srate; From e877b729c649c2850f61f2ae37296ae701f9ad63 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 8 May 2016 22:50:12 +0200 Subject: [PATCH 0096/1050] usb: gadget: f_tcm: out of bound access in usbg_drop_tpg Commit dc8c46a5ae77 ("usb: gadget: f_tcm: convert to new function interface with backward compatibility") introduced a possible out of bounds memory access: If tpg is not found in function usbg_drop_tpg, tpg_instances[TPG_INSTANCES] is accessed. Fixes: dc8c46a5ae77 ("usb: gadget: f_tcm: convert to new function interface with backward compatibility") Signed-off-by: Heinrich Schuchardt Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_tcm.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 35fe3c80cfc0..197f73386fac 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1445,16 +1445,18 @@ static void usbg_drop_tpg(struct se_portal_group *se_tpg) for (i = 0; i < TPG_INSTANCES; ++i) if (tpg_instances[i].tpg == tpg) break; - if (i < TPG_INSTANCES) + if (i < TPG_INSTANCES) { tpg_instances[i].tpg = NULL; - opts = container_of(tpg_instances[i].func_inst, - struct f_tcm_opts, func_inst); - mutex_lock(&opts->dep_lock); - if (opts->has_dep) - module_put(opts->dependent); - else - configfs_undepend_item_unlocked(&opts->func_inst.group.cg_item); - mutex_unlock(&opts->dep_lock); + opts = container_of(tpg_instances[i].func_inst, + struct f_tcm_opts, func_inst); + mutex_lock(&opts->dep_lock); + if (opts->has_dep) + module_put(opts->dependent); + else + configfs_undepend_item_unlocked( + &opts->func_inst.group.cg_item); + mutex_unlock(&opts->dep_lock); + } mutex_unlock(&tpg_instances_lock); kfree(tpg); From c6010c8b4d2c6e75853ca63c602c9af56fcbead5 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Fri, 13 May 2016 20:32:16 +0800 Subject: [PATCH 0097/1050] usb: gadget: f_fs: Fix kernel panic if use_os_string not set If c->cdev->use_os_string flag is not set, don't need to invoke ffs_do_os_descs() in _ffs_func_bind. So uninitialized ext_compat_id pointer won't be accessed by __ffs_func_bind_do_os_desc to cause kernel panic. Signed-off-by: Jim Lin Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 9ac6e868946d..cc33d2667408 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2849,7 +2849,7 @@ static int _ffs_func_bind(struct usb_configuration *c, goto error; func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table); - if (c->cdev->use_os_string) + if (c->cdev->use_os_string) { for (i = 0; i < ffs->interfaces_count; ++i) { struct usb_os_desc *desc; @@ -2860,13 +2860,15 @@ static int _ffs_func_bind(struct usb_configuration *c, vla_ptr(vlabuf, d, ext_compat) + i * 16; INIT_LIST_HEAD(&desc->ext_prop); } - ret = ffs_do_os_descs(ffs->ms_os_descs_count, - vla_ptr(vlabuf, d, raw_descs) + - fs_len + hs_len + ss_len, - d_raw_descs__sz - fs_len - hs_len - ss_len, - __ffs_func_bind_do_os_desc, func); - if (unlikely(ret < 0)) - goto error; + ret = ffs_do_os_descs(ffs->ms_os_descs_count, + vla_ptr(vlabuf, d, raw_descs) + + fs_len + hs_len + ss_len, + d_raw_descs__sz - fs_len - hs_len - + ss_len, + __ffs_func_bind_do_os_desc, func); + if (unlikely(ret < 0)) + goto error; + } func->function.os_desc_n = c->cdev->use_os_string ? ffs->interfaces_count : 0; From 7e14f47a55ed67c9d8a8acea6023412f92bac936 Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 13 May 2016 18:30:42 +0800 Subject: [PATCH 0098/1050] usb: gadget: composite: don't queue OS desc req if length is invalid In OS descriptors handling, if ctrl->bRequestType is USB_RECIP_DEVICE and w_index != 0x4 or (w_value >> 8) is true, it will not assign a valid value to req->length, but use the default value(-EOPNOTSUPP), and queue an OS desc request with the invalid req->length. It always happens on the platforms which use os_desc (for example: rk3366, rk3399), and cause kernel panic as follows (use dwc3 driver): Unable to handle kernel paging request at virtual address ffffffc0f7e00000 Internal error: Oops: 96000146 [#1] PREEMPT SMP PC is at __dma_clean_range+0x18/0x30 LR is at __swiotlb_map_page+0x50/0x64 Call trace: [] __dma_clean_range+0x18/0x30 [] usb_gadget_map_request+0x134/0x1b0 [] __dwc3_ep0_do_control_data+0x110/0x14c [] __dwc3_gadget_ep0_queue+0x198/0x1b8 [] dwc3_gadget_ep0_queue+0xc0/0xe8 [] composite_ep0_queue.constprop.14+0x34/0x98 [] composite_setup+0xf60/0x100c [] android_setup+0xd8/0x138 [] dwc3_ep0_delegate_req+0x34/0x50 [] dwc3_ep0_interrupt+0x5dc/0xb58 [] dwc3_thread_interrupt+0x15c/0xa24 With this patch, the gadget driver will not queue a request and return immediately if req->length is invalid. And the usb controller driver can handle the unsupport request correctly. Signed-off-by: William Wu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index d67de0d22a2b..eb648485a58c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1868,14 +1868,19 @@ unknown: } break; } - req->length = value; - req->context = cdev; - req->zero = value < w_length; - value = composite_ep0_queue(cdev, req, GFP_ATOMIC); - if (value < 0) { - DBG(cdev, "ep_queue --> %d\n", value); - req->status = 0; - composite_setup_complete(gadget->ep0, req); + + if (value >= 0) { + req->length = value; + req->context = cdev; + req->zero = value < w_length; + value = composite_ep0_queue(cdev, req, + GFP_ATOMIC); + if (value < 0) { + DBG(cdev, "ep_queue --> %d\n", value); + req->status = 0; + composite_setup_complete(gadget->ep0, + req); + } } return value; } From 27a0faafdca53bda21ed340ca8f8960696dda049 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 11 May 2016 17:33:11 +0100 Subject: [PATCH 0099/1050] usb: dwc3: st: Fix USB_DR_MODE_PERIPHERAL configuration. Set USB3_FORCE_VBUSVALID when configured for USB_DR_MODE_PERIPHERAL mode, as it is required to have a working setup. This worked on the internal driver by relying on the reset value of the syscfg register as the bits aren't explicity cleared and set like the upstream driver. Also add a comment about what setting this bit means. Signed-off-by: Peter Griffin Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-st.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 5c0adb9c6fb2..50d6ae6f88bc 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -129,12 +129,18 @@ static int st_dwc3_drd_init(struct st_dwc3 *dwc3_data) switch (dwc3_data->dr_mode) { case USB_DR_MODE_PERIPHERAL: - val &= ~(USB3_FORCE_VBUSVALID | USB3_DELAY_VBUSVALID + val &= ~(USB3_DELAY_VBUSVALID | USB3_SEL_FORCE_OPMODE | USB3_FORCE_OPMODE(0x3) | USB3_SEL_FORCE_DPPULLDOWN2 | USB3_FORCE_DPPULLDOWN2 | USB3_SEL_FORCE_DMPULLDOWN2 | USB3_FORCE_DMPULLDOWN2); - val |= USB3_DEVICE_NOT_HOST; + /* + * USB3_PORT2_FORCE_VBUSVALID When '1' and when + * USB3_PORT2_DEVICE_NOT_HOST = 1, forces VBUSVLDEXT2 input + * of the pico PHY to 1. + */ + + val |= USB3_DEVICE_NOT_HOST | USB3_FORCE_VBUSVALID; break; case USB_DR_MODE_HOST: From 893e093c786c4256d52809eed697e9d70a6f6643 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 May 2016 11:23:51 +0200 Subject: [PATCH 0100/1050] netfilter: nf_ct_helper: bail out on duplicated helpers Don't allow registration of helpers using the same tuple: { l3proto, l4proto, src-port } We lookup for the helper from the packet path using this tuple through __nf_ct_helper_find(). Therefore, we have to avoid having two helpers with the same tuple to ensure predictible behaviour. Don't compare the helper string names anymore since it is valid to register two helpers with the same name, but using different tuples. This is also implicitly fixing up duplicated helper registration via ports= modparam since the name comparison was defeating the tuple duplication validation. Reported-by: Feng Gao Reported-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec575cd5..48de9be45a9a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -361,9 +361,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int ret = 0; - struct nf_conntrack_helper *cur; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); + struct nf_conntrack_helper *cur; + int ret = 0; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); @@ -371,9 +372,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) mutex_lock(&nf_ct_helper_mutex); hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { - if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && - cur->tuple.src.l3num == me->tuple.src.l3num && - cur->tuple.dst.protonum == me->tuple.dst.protonum) { + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } From 62397da50bb20a6b812c949ef465d7e69fe54bb6 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 13 May 2016 12:41:48 +0200 Subject: [PATCH 0101/1050] mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL A wmediumd that does not send this attribute causes a NULL pointer dereference, as the attribute is accessed even if it does not exist. The attribute was required but never checked ever since userspace frame forwarding has been introduced. The issue gets more problematic once we allow wmediumd registration from user namespaces. Cc: stable@vger.kernel.org Fixes: 7882513bacb1 ("mac80211_hwsim driver support userspace frame tx/rx") Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 9ed0ed1bf514..4dd5adcdd29b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2776,6 +2776,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || !info->attrs[HWSIM_ATTR_FLAGS] || !info->attrs[HWSIM_ATTR_COOKIE] || + !info->attrs[HWSIM_ATTR_SIGNAL] || !info->attrs[HWSIM_ATTR_TX_INFO]) goto out; From fe7a7c57629e8dcbc0e297363a9b2366d67a6dc5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 15 May 2016 13:19:16 -0400 Subject: [PATCH 0102/1050] mac80211: mesh: flush mesh paths unconditionally Currently, the mesh paths associated with a nexthop station are cleaned up in the following code path: __sta_info_destroy_part1 synchronize_net() __sta_info_destroy_part2 -> cleanup_single_sta -> mesh_sta_cleanup -> mesh_plink_deactivate -> mesh_path_flush_by_nexthop However, there are a couple of problems here: 1) the paths aren't flushed at all if the MPM is running in userspace (e.g. when using wpa_supplicant or authsae) 2) there is no synchronize_rcu between removing the path and readers accessing the nexthop, which means the following race is possible: CPU0 CPU1 ~~~~ ~~~~ sta_info_destroy_part1() synchronize_net() rcu_read_lock() mesh_nexthop_resolve() mpath = mesh_path_lookup() [...] -> mesh_path_flush_by_nexthop() sta = rcu_dereference( mpath->next_hop) kfree(sta) access sta <-- CRASH Fix both of these by unconditionally flushing paths before destroying the sta, and by adding a synchronize_net() after path flush to ensure no active readers can still dereference the sta. Fixes this crash: [ 348.529295] BUG: unable to handle kernel paging request at 00020040 [ 348.530014] IP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] *pde = 00000000 [ 348.530014] Oops: 0000 [#1] PREEMPT [ 348.530014] Modules linked in: drbg ansi_cprng ctr ccm ppp_generic slhc ipt_MASQUERADE nf_nat_masquerade_ipv4 8021q ] [ 348.530014] CPU: 0 PID: 20597 Comm: wget Tainted: G O 4.6.0-rc5-wt=V1 #1 [ 348.530014] Hardware name: To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080016 11/07/2014 [ 348.530014] task: f64fa280 ti: f4f9c000 task.ti: f4f9c000 [ 348.530014] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 348.530014] EIP is at ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] EAX: f4ce63e0 EBX: 00000088 ECX: f3788416 EDX: 00020008 [ 348.530014] ESI: 00000000 EDI: 00000088 EBP: f6409a4c ESP: f6409a40 [ 348.530014] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 348.530014] CR0: 80050033 CR2: 00020040 CR3: 33190000 CR4: 00000690 [ 348.530014] Stack: [ 348.530014] 00000000 f4ce63e0 f5f9bd80 f6409a64 f9291d80 0000ce67 f5d51e00 f4ce63e0 [ 348.530014] f3788416 f6409a80 f9291dc1 f4ce8320 f4ce63e0 f5d51e00 f4ce63e0 f4ce8320 [ 348.530014] f6409a98 f9277f6f 00000000 00000000 0000007c 00000000 f6409b2c f9278dd1 [ 348.530014] Call Trace: [ 348.530014] [] mesh_nexthop_lookup+0xbb/0xc8 [mac80211] [ 348.530014] [] mesh_nexthop_resolve+0x34/0xd8 [mac80211] [ 348.530014] [] ieee80211_xmit+0x92/0xc1 [mac80211] [ 348.530014] [] __ieee80211_subif_start_xmit+0x807/0x83c [mac80211] [ 348.530014] [] ? sch_direct_xmit+0xd7/0x1b3 [ 348.530014] [] ? __local_bh_enable_ip+0x5d/0x7b [ 348.530014] [] ? nf_nat_ipv4_out+0x4c/0xd0 [nf_nat_ipv4] [ 348.530014] [] ? iptable_nat_ipv4_fn+0xf/0xf [iptable_nat] [ 348.530014] [] ? netif_skb_features+0x14d/0x30a [ 348.530014] [] ieee80211_subif_start_xmit+0xa/0xe [mac80211] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] batadv_send_skb_packet+0xd6/0xec [batman_adv] [ 348.530014] [] batadv_send_unicast_skb+0x15/0x4a [batman_adv] [ 348.530014] [] batadv_dat_send_data+0x27e/0x310 [batman_adv] [ 348.530014] [] ? batadv_tt_global_hash_find.isra.11+0x8/0xa [batman_adv] [ 348.530014] [] batadv_dat_snoop_outgoing_arp_request+0x208/0x23d [batman_adv] [ 348.530014] [] batadv_interface_tx+0x206/0x385 [batman_adv] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] ? igb_xmit_frame+0x57/0x72 [igb] [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] br_dev_queue_push_xmit+0xeb/0xfb [bridge] [ 348.530014] [] br_forward_finish+0x29/0x74 [bridge] [ 348.530014] [] ? deliver_clone+0x3b/0x3b [bridge] [ 348.530014] [] __br_forward+0x89/0xe7 [bridge] [ 348.530014] [] ? br_dev_queue_push_xmit+0xfb/0xfb [bridge] [ 348.530014] [] deliver_clone+0x34/0x3b [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_flood+0x77/0x95 [bridge] [ 348.530014] [] br_flood_forward+0x13/0x1a [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_handle_frame_finish+0x392/0x3db [bridge] [ 348.530014] [] ? nf_iterate+0x2b/0x6b [ 348.530014] [] br_handle_frame+0x1e6/0x240 [bridge] [ 348.530014] [] ? br_handle_local_finish+0x6a/0x6a [bridge] [ 348.530014] [] __netif_receive_skb_core+0x43a/0x66b [ 348.530014] [] ? br_handle_frame_finish+0x3db/0x3db [bridge] [ 348.530014] [] ? resched_curr+0x19/0x37 [ 348.530014] [] ? check_preempt_wakeup+0xbf/0xfe [ 348.530014] [] ? ktime_get_with_offset+0x5c/0xfc [ 348.530014] [] __netif_receive_skb+0x47/0x55 [ 348.530014] [] netif_receive_skb_internal+0x40/0x5a [ 348.530014] [] napi_gro_receive+0x3a/0x94 [ 348.530014] [] igb_poll+0x6fd/0x9ad [igb] [ 348.530014] [] ? swake_up_locked+0x14/0x26 [ 348.530014] [] net_rx_action+0xde/0x250 [ 348.530014] [] __do_softirq+0x8a/0x163 [ 348.530014] [] ? __hrtimer_tasklet_trampoline+0x19/0x19 [ 348.530014] [] do_softirq_own_stack+0x26/0x2c [ 348.530014] [ 348.530014] [] irq_exit+0x31/0x6f [ 348.530014] [] do_IRQ+0x8d/0xa0 [ 348.530014] [] common_interrupt+0x2c/0x40 [ 348.530014] Code: e7 8c 00 66 81 ff 88 00 75 12 85 d2 75 0e b2 c3 b8 83 e9 29 f9 e8 a7 5f f9 c6 eb 74 66 81 e3 8c 005 [ 348.530014] EIP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] SS:ESP 0068:f6409a40 [ 348.530014] CR2: 0000000000020040 [ 348.530014] ---[ end trace 48556ac26779732e ]--- [ 348.530014] Kernel panic - not syncing: Fatal exception in interrupt [ 348.530014] Kernel Offset: disabled Cc: stable@vger.kernel.org Reported-by: Fred Veldini Tested-by: Fred Veldini Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4c6404e1ad6e..21b1fdf5d01d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } From 6fe04128f158c5ad27e7504bfdf1b12e63331bc9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 May 2016 17:34:38 +0200 Subject: [PATCH 0103/1050] mac80211: fix fast_tx header alignment The header field is defined as u8[] but also accessed as struct ieee80211_hdr. Enforce an alignment of 2 to prevent unnecessary unaligned accesses, which can be very harmful for performance on many platforms. Fixes: e495c24731a2 ("mac80211: extend fast-xmit for more ciphers") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c8b8ccc370eb..78b0ef32dddd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -280,7 +280,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; From 0358ccc8ffd8d9b76992b8deab58fb9a721fb18a Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 31 May 2016 09:12:04 -0400 Subject: [PATCH 0104/1050] ALSA: uapi: Add three missing header files to Kbuild file include/uapi/sound/Kbuild was missing the inclusion of three header files in that directory. Signed-off-by: Robert P. J. Day Signed-off-by: Takashi Iwai --- include/uapi/sound/Kbuild | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild index a7f27704f980..691984cb0b91 100644 --- a/include/uapi/sound/Kbuild +++ b/include/uapi/sound/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list header-y += asequencer.h +header-y += asoc.h header-y += asound.h header-y += asound_fm.h header-y += compress_offload.h @@ -10,3 +11,5 @@ header-y += hdsp.h header-y += hdspm.h header-y += sb16_csp.h header-y += sfnt_info.h +header-y += tlv.h +header-y += usb_stream.h From c714a588fc39dc8c8f70014f5691217717983fb3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 31 May 2016 17:26:00 +0800 Subject: [PATCH 0105/1050] regulator: tps51632: Fix setting ramp delay According to the datasheet: SLEW Register(Address = 07h) b7 b6 b5 b4 b3 b2 b1 b0 48mV/us 42mV/us 36mV/us 30mV/us 24mV/us 18mV/us 12mV/us 6mV/us Current code does not set correct slew rate in some cases: e.g. Assume ramp_delay is 10000, current code sets slew register to 6mV/us. Fix the logic to set slew register. Signed-off-by: Axel Lin Acked-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/tps51632-regulator.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c index 572816e30095..c139890c1514 100644 --- a/drivers/regulator/tps51632-regulator.c +++ b/drivers/regulator/tps51632-regulator.c @@ -94,11 +94,14 @@ static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay) { struct tps51632_chip *tps = rdev_get_drvdata(rdev); - int bit = ramp_delay/6000; + int bit; int ret; - if (bit) - bit--; + if (ramp_delay == 0) + bit = 0; + else + bit = DIV_ROUND_UP(ramp_delay, 6000) - 1; + ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit)); if (ret < 0) dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret); From 3ac066e2227cb272c097f34475247fa0a6cdd2ff Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Tue, 31 May 2016 17:56:23 +0200 Subject: [PATCH 0106/1050] spi: spi-ti-qspi: Suspend the queue before removing the device Before disabling the pm_runtime, we must ensure that there is no transfer in progress nor will a new one be started. Otherwise the message pump will fail and in the end, the process requesting the transfer will be stuck. This behavior has been observed when transferring data from a SPI flash with dd while removing the module on a DRA7x-evm. Signed-off-by: Jean-Jacques Hiblot Signed-off-by: Mark Brown --- drivers/spi/spi-ti-qspi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 443f664534e1..29ea8d2f9824 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -646,6 +646,13 @@ free_master: static int ti_qspi_remove(struct platform_device *pdev) { + struct ti_qspi *qspi = platform_get_drvdata(pdev); + int rc; + + rc = spi_master_suspend(qspi->master); + if (rc) + return rc; + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); From 792293cfd516a173bbd687b4b26da0f97f97abd2 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 6 May 2016 19:26:05 +0200 Subject: [PATCH 0107/1050] drm/vc4: Fix get_vblank_counter with proper no-op for Linux 4.4+ get_vblank_counter hooked up to drm_vblank_count() which alway was non-sensical but didn't hurt in the past. Since Linux 4.4 it triggers a WARN_ON_ONCE in drm_update_vblank_count on first vblank irq disable, so fix it by hooking to drm_vblank_no_hw_counter(). Tested against Raspian kernel 4.4.8 tree on RPi 2B. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt Fixes: c8b75bca92cb ("drm/vc4: Add KMS support for Raspberry Pi.") --- drivers/gpu/drm/vc4/vc4_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 3446ece21b4a..ef7de8eb2b63 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -91,7 +91,7 @@ static struct drm_driver vc4_drm_driver = { .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, - .get_vblank_counter = drm_vblank_count, + .get_vblank_counter = drm_vblank_no_hw_counter, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, From ee7c10e10b632e2319ed1d2d49d63df51a611e62 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 6 May 2016 19:26:06 +0200 Subject: [PATCH 0108/1050] drm/vc4: Fix drm_vblank_put/get imbalance in page flip path. The async page flip path was missing drm_crtc_vblank_get/put completely. The sync flip path was missing a vblank put, so async flips only reported proper pageflip completion events by chance, and vblank irq's never turned off after a first vsync'ed page flip until system reboot. Tested against Raspian kernel 4.4.8 tree on RPi 2B. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt Fixes: b501bacc6060 ("drm/vc4: Add support for async pageflips.") --- drivers/gpu/drm/vc4/vc4_crtc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 904d0754ad78..e9befb6d10fd 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -506,6 +506,7 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) if (vc4_crtc->event) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; + drm_crtc_vblank_put(crtc); } spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -556,6 +557,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) spin_unlock_irqrestore(&dev->event_lock, flags); } + drm_crtc_vblank_put(crtc); drm_framebuffer_unreference(flip_state->fb); kfree(flip_state); @@ -598,6 +600,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, return ret; } + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + /* Immediately update the plane's legacy fb pointer, so that later * modeset prep sees the state that will be present when the semaphore * is released. From e7c31f6f25b84fed961dc0dce6248878527693ae Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Tue, 3 May 2016 13:48:20 -0400 Subject: [PATCH 0109/1050] drm/vc4: Return -EBUSY if there's already a pending flip event. As per the documentation in drm_crtc.h, atomic_commit should return -EBUSY if an asynchronous update is requested and there is an earlier update pending. v2: Rebase on the s/async/nonblock/ change. Signed-off-by: Robert Foss Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_kms.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index cb37751bc99f..861a623bc185 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -117,10 +117,18 @@ static int vc4_atomic_commit(struct drm_device *dev, return -ENOMEM; /* Make sure that any outstanding modesets have finished. */ - ret = down_interruptible(&vc4->async_modeset); - if (ret) { - kfree(c); - return ret; + if (nonblock) { + ret = down_trylock(&vc4->async_modeset); + if (ret) { + kfree(c); + return -EBUSY; + } + } else { + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(c); + return ret; + } } ret = drm_atomic_helper_prepare_planes(dev, state); From 65d4f4c151a5fa7b2dacaaf70def3f95001766d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 Sep 2015 15:00:37 -0400 Subject: [PATCH 0110/1050] Btrfs: end transaction if we abort when creating uuid root We still need to call btrfs_end_transaction if we call btrfs_abort_transaction, otherwise we hang and make me super grumpy. Thanks, Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9c01824eef08..673c72ab4fbe 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4230,6 +4230,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(uuid_root)) { ret = PTR_ERR(uuid_root); btrfs_abort_transaction(trans, tree_root, ret); + btrfs_end_transaction(trans, tree_root); return ret; } From fbd83006e3e536fcb103228d2422ea63129ccb03 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 31 May 2016 09:42:29 -0400 Subject: [PATCH 0111/1050] scsi: Add QEMU CD-ROM to VPD Inquiry Blacklist Linux fails to boot as a guest with a QEMU CD-ROM: [ 4.439488] ata2.00: ATAPI: QEMU CD-ROM, 0.8.2, max UDMA/100 [ 4.443649] ata2.00: configured for MWDMA2 [ 4.450267] scsi 1:0:0:0: CD-ROM QEMU QEMU CD-ROM 0.8. PQ: 0 ANSI: 5 [ 4.464317] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen [ 4.464319] ata2.00: BMDMA stat 0x5 [ 4.464339] ata2.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 0 dma 16640 in [ 4.464339] Inquiry 12 01 00 00 ff 00res 48/20:02:00:24:00/00:00:00:00:00/a0 Emask 0x2 (HSM violation) [ 4.464341] ata2.00: status: { DRDY DRQ } [ 4.465864] ata2: soft resetting link [ 4.625971] ata2.00: configured for MWDMA2 [ 4.628290] ata2: EH complete [ 4.646670] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen [ 4.646671] ata2.00: BMDMA stat 0x5 [ 4.646683] ata2.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 0 dma 16640 in [ 4.646683] Inquiry 12 01 00 00 ff 00res 48/20:02:00:24:00/00:00:00:00:00/a0 Emask 0x2 (HSM violation) [ 4.646685] ata2.00: status: { DRDY DRQ } [ 4.648193] ata2: soft resetting link ... Fix this by suppressing VPD inquiry for this device. Signed-off-by: Ewan D. Milne Reported-by: Jan Stancek Tested-by: Jan Stancek Cc: Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 3408578b08d6..ff41c310c900 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -230,6 +230,7 @@ static struct { {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, + {"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES}, {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"QUANTUM", "XP34301", "1071", BLIST_NOTQ}, From 50c763f8c1bac0dc00f7788a75f227276c0efd54 Mon Sep 17 00:00:00 2001 From: John Youn Date: Tue, 31 May 2016 17:49:56 -0700 Subject: [PATCH 0112/1050] usb: dwc3: Set the ClearPendIN bit on Clear Stall EP command As of core revision 2.60a the recommended programming model is to set the ClearPendIN bit when issuing a Clear Stall EP command for IN endpoints. This is to prevent an issue where some (non-compliant) hosts may not send ACK TPs for pending IN transfers due to a mishandled error condition. Synopsys STAR 9000614252. Signed-off-by: John Youn Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/gadget.c | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 7ddf9449a063..654050684f4f 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -402,6 +402,7 @@ #define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f) #define DWC3_DEPCMD_STATUS(x) (((x) >> 12) & 0x0F) #define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11) +#define DWC3_DEPCMD_CLEARPENDIN (1 << 11) #define DWC3_DEPCMD_CMDACT (1 << 10) #define DWC3_DEPCMD_CMDIOC (1 << 8) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 9a7d0bd15dc3..07248ff1be5c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -347,6 +347,28 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, return ret; } +static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep) +{ + struct dwc3 *dwc = dep->dwc; + struct dwc3_gadget_ep_cmd_params params; + u32 cmd = DWC3_DEPCMD_CLEARSTALL; + + /* + * As of core revision 2.60a the recommended programming model + * is to set the ClearPendIN bit when issuing a Clear Stall EP + * command for IN endpoints. This is to prevent an issue where + * some (non-compliant) hosts may not send ACK TPs for pending + * IN transfers due to a mishandled error condition. Synopsys + * STAR 9000614252. + */ + if (dep->direction && (dwc->revision >= DWC3_REVISION_260A)) + cmd |= DWC3_DEPCMD_CLEARPENDIN; + + memset(¶ms, 0, sizeof(params)); + + return dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, ¶ms); +} + static dma_addr_t dwc3_trb_dma_offset(struct dwc3_ep *dep, struct dwc3_trb *trb) { @@ -1314,8 +1336,7 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { - ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, - DWC3_DEPCMD_CLEARSTALL, ¶ms); + ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) dev_err(dwc->dev, "failed to clear STALL on %s\n", dep->name); @@ -2247,7 +2268,6 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc) for (epnum = 1; epnum < DWC3_ENDPOINTS_NUM; epnum++) { struct dwc3_ep *dep; - struct dwc3_gadget_ep_cmd_params params; int ret; dep = dwc->eps[epnum]; @@ -2259,9 +2279,7 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc) dep->flags &= ~DWC3_EP_STALL; - memset(¶ms, 0, sizeof(params)); - ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, - DWC3_DEPCMD_CLEARSTALL, ¶ms); + ret = dwc3_send_clear_stall_ep_cmd(dep); WARN_ON_ONCE(ret); } } From ed596a4a88bd161f868ccba078557ee7ede8a6ef Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 31 May 2016 14:48:15 +0200 Subject: [PATCH 0113/1050] HID: elo: kill not flush the work Flushing a work that reschedules itself is not a sensible operation. It needs to be killed. Failure to do so leads to a kernel panic in the timer code. CC: stable@vger.kernel.org Signed-off-by: Oliver Neukum Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-elo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index aad8c162a825..0cd4f7216239 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -261,7 +261,7 @@ static void elo_remove(struct hid_device *hdev) struct elo_priv *priv = hid_get_drvdata(hdev); hid_hw_stop(hdev); - flush_workqueue(wq); + cancel_delayed_work_sync(&priv->work); kfree(priv); } From a80e803a2ae4efa5efbcfa97dcbbc48d15226cf9 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 31 May 2016 17:31:15 +0200 Subject: [PATCH 0114/1050] HID: multitouch: Add MT_QUIRK_NOT_SEEN_MEANS_UP to Surface Pro 3 The firmware found in the touch screen of an SP3 is buggy and may miss to send lift off reports for contacts. Try to work around that issue by using MT_QUIRK_NOT_SEEN_MEANS_UP. based on a patch from: Daniel Martin Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c741f5e50a66..95b7d61d9910 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1401,6 +1401,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_PCT) }, + /* Ntrig Panel */ + { .driver_data = MT_CLS_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_NTRIG, 0x1b05) }, + /* PixArt optical touch screen */ { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_PIXART, From f840ab18bdf2e415dac21d09fbbbd2873111bd48 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 31 May 2016 11:32:02 +0100 Subject: [PATCH 0115/1050] thermal: cpu_cooling: fix improper order during initialization The freq_table array is not populated before calling thermal_of_cooling_register. The code which populates the freq table was introduced in commit f6859014. This should be done before registering new thermal cooling device. The log shows effects of this wrong decision. [ 2.172614] cpu cpu1: Failed to get voltage for frequency 1984518656000: -34 [ 2.220863] cpu cpu0: Failed to get voltage for frequency 1984524416000: -34 Cc: # 3.19+ Fixes: f6859014c7e7 ("thermal: cpu_cooling: Store frequencies in descending order") Signed-off-by: Lukasz Luba Acked-by: Javi Merino Acked-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/cpu_cooling.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 6ceac4f2d4b2..5b4b47ed948b 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -857,14 +857,6 @@ __cpufreq_cooling_register(struct device_node *np, goto free_power_table; } - snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", - cpufreq_dev->id); - - cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, - &cpufreq_cooling_ops); - if (IS_ERR(cool_dev)) - goto remove_idr; - /* Fill freq-table in descending order of frequencies */ for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) { freq = find_next_max(table, freq); @@ -877,6 +869,14 @@ __cpufreq_cooling_register(struct device_node *np, pr_debug("%s: freq:%u KHz\n", __func__, freq); } + snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", + cpufreq_dev->id); + + cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, + &cpufreq_cooling_ops); + if (IS_ERR(cool_dev)) + goto remove_idr; + cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0]; cpufreq_dev->cool_dev = cool_dev; From 5a5e78cd706ce0577ed6356634a34552c866cc10 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 18 May 2016 23:01:39 +0800 Subject: [PATCH 0116/1050] thermal: add the note for set_trip_temp Fixes commit 60f9ce3ada53 ("thermal: of-thermal: allow setting trip_temp on hardware") Signed-off-by: Caesar Wang Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e45abe7db9a6..ee517bef0db0 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -335,6 +335,8 @@ struct thermal_genl_event { * @get_trend: a pointer to a function that reads the sensor temperature trend. * @set_emul_temp: a pointer to a function that sets sensor emulated * temperature. + * @set_trip_temp: a pointer to a function that sets the trip temperature on + * hardware. */ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); From 540c26087bfbad6ea72758b76b16ae6282a73fea Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Wed, 1 Jun 2016 11:32:51 -0700 Subject: [PATCH 0117/1050] Input: xpad - fix rumble on Xbox One controllers with 2015 firmware Xbox One controllers that shipped with or were upgraded to the 2015 firmware discard the current rumble packets we send. This patch changes the Xbox One rumble packet to a form that both the newer and older firmware will accept. It is based on changes made to support newer Xbox One controllers in the SteamOS brewmaster-4.1 kernel branch. Signed-off-by: Cameron Gutman Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 804dbcc37d3f..923c57236c51 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1031,17 +1031,17 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect case XTYPE_XBOXONE: packet->data[0] = 0x09; /* activate rumble */ - packet->data[1] = 0x08; + packet->data[1] = 0x00; packet->data[2] = xpad->odata_serial++; - packet->data[3] = 0x08; /* continuous effect */ - packet->data[4] = 0x00; /* simple rumble mode */ - packet->data[5] = 0x03; /* L and R actuator only */ - packet->data[6] = 0x00; /* TODO: LT actuator */ - packet->data[7] = 0x00; /* TODO: RT actuator */ + packet->data[3] = 0x09; + packet->data[4] = 0x00; + packet->data[5] = 0x0F; + packet->data[6] = 0x00; + packet->data[7] = 0x00; packet->data[8] = strong / 512; /* left actuator */ packet->data[9] = weak / 512; /* right actuator */ - packet->data[10] = 0x80; /* length of pulse */ - packet->data[11] = 0x00; /* stop period of pulse */ + packet->data[10] = 0xFF; + packet->data[11] = 0x00; packet->data[12] = 0x00; packet->len = 13; packet->pending = true; From b52fc2183f40170489692852fd66d8750cbaa324 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:36:55 -0700 Subject: [PATCH 0118/1050] vexpress/spc: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Acked-by: Sudeep Holla Signed-off-by: Stephen Boyd --- arch/arm/mach-vexpress/spc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 5766ce2be32b..8409cab3f760 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -547,7 +547,7 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) init.name = dev_name(cpu_dev); init.ops = &clk_spc_ops; - init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.flags = CLK_GET_RATE_NOCACHE; init.num_parents = 0; return devm_clk_register(cpu_dev, &spc->hw); From 3fb9c41286e594d52d23ca58404c69a7e2c39c41 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:31:12 -0700 Subject: [PATCH 0119/1050] powerpc/512x: clk: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Cc: Gerhard Sittig Signed-off-by: Stephen Boyd --- arch/powerpc/platforms/512x/clock-commonclk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index c50ea76ba66c..6081fbd75330 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -221,7 +221,7 @@ static bool soc_has_mclk_mux0_canin(void) /* convenience wrappers around the common clk API */ static inline struct clk *mpc512x_clk_fixed(const char *name, int rate) { - return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); + return clk_register_fixed_rate(NULL, name, NULL, 0, rate); } static inline struct clk *mpc512x_clk_factor( From 3c7f4f54578b983c964eb992229a7dd153ce5ee0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 1 Jun 2016 14:52:54 -0700 Subject: [PATCH 0120/1050] clk: microchip: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Cc: Purna Chandra Mandal Cc: Ralf Baechle Cc: Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-pic32mzda.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c index 020a29acc5b0..51f54380474b 100644 --- a/drivers/clk/microchip/clk-pic32mzda.c +++ b/drivers/clk/microchip/clk-pic32mzda.c @@ -180,15 +180,15 @@ static int pic32mzda_clk_probe(struct platform_device *pdev) /* register fixed rate clocks */ clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); clks[FRCCLK] = clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL, - CLK_IS_ROOT, 32000); + 0, 32000); clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); /* fixed rate (optional) clock */ if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) { pr_info("pic32-clk: dt requests SOSC.\n"); From 27a41a83ec54d0edfcaf079310244e7f013a7701 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 1 Jun 2016 18:09:07 +0300 Subject: [PATCH 0121/1050] xhci: Cleanup only when releasing primary hcd Under stress occasions some TI devices might not return early when reading the status register during the quirk invocation of xhci_irq made by usb_hcd_pci_remove. This means that instead of returning, we end up handling this interruption in the middle of a shutdown. Since xhci->event_ring has already been freed in xhci_mem_cleanup, we end up accessing freed memory, causing the Oops below. commit 8c24d6d7b09d ("usb: xhci: stop everything on the first call to xhci_stop") is the one that changed the instant in which we clean up the event queue when stopping a device. Before, we didn't call xhci_mem_cleanup at the first time xhci_stop is executed (for the shared HCD), instead, we only did it after the invocation for the primary HCD, much later at the removal path. The code flow for this oops looks like this: xhci_pci_remove() usb_remove_hcd(xhci->shared) xhci_stop(xhci->shared) xhci_halt() xhci_mem_cleanup(xhci); // Free the event_queue usb_hcd_pci_remove(primary) xhci_irq() // Access the event_queue if STS_EINT is set. Crash. xhci_stop() xhci_halt() // return early The fix modifies xhci_stop to only cleanup the xhci data when releasing the primary HCD. This way, we still have the event_queue configured when invoking xhci_irq. We still halt the device on the first call to xhci_stop, though. I could reproduce this issue several times on the mainline kernel by doing a bind-unbind stress test with a specific storage gadget attached. I also ran the same test over-night with my patch applied and didn't observe the issue anymore. [ 113.334124] Unable to handle kernel paging request for data at address 0x00000028 [ 113.335514] Faulting instruction address: 0xd00000000d4f767c [ 113.336839] Oops: Kernel access of bad area, sig: 11 [#1] [ 113.338214] SMP NR_CPUS=1024 NUMA PowerNV [c000000efe47ba90] c000000000720850 usb_hcd_irq+0x50/0x80 [c000000efe47bac0] c00000000073d328 usb_hcd_pci_remove+0x68/0x1f0 [c000000efe47bb00] d00000000daf0128 xhci_pci_remove+0x78/0xb0 [xhci_pci] [c000000efe47bb30] c00000000055cf70 pci_device_remove+0x70/0x110 [c000000efe47bb70] c00000000061c6bc __device_release_driver+0xbc/0x190 [c000000efe47bba0] c00000000061c7d0 device_release_driver+0x40/0x70 [c000000efe47bbd0] c000000000619510 unbind_store+0x120/0x150 [c000000efe47bc20] c0000000006183c4 drv_attr_store+0x64/0xa0 [c000000efe47bc60] c00000000039f1d0 sysfs_kf_write+0x80/0xb0 [c000000efe47bca0] c00000000039e14c kernfs_fop_write+0x18c/0x1f0 [c000000efe47bcf0] c0000000002e962c __vfs_write+0x6c/0x190 [c000000efe47bd90] c0000000002eab40 vfs_write+0xc0/0x200 [c000000efe47bde0] c0000000002ec85c SyS_write+0x6c/0x110 [c000000efe47be30] c000000000009260 system_call+0x38/0x108 Signed-off-by: Gabriel Krisman Bertazi Cc: Roger Quadros Cc: joel@jms.id.au Cc: stable@vger.kernel.org Reviewed-by: Roger Quadros Cc: #v4.3+ Tested-by: Joel Stanley Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 3 ++- drivers/usb/host/xhci.c | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 52deae4b7eac..1287339f11bb 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2721,7 +2721,8 @@ hw_died: writel(irq_pending, &xhci->ir_set->irq_pending); } - if (xhci->xhc_state & XHCI_STATE_DYING) { + if (xhci->xhc_state & XHCI_STATE_DYING || + xhci->xhc_state & XHCI_STATE_HALTED) { xhci_dbg(xhci, "xHCI dying, ignoring interrupt. " "Shouldn't IRQs be disabled?\n"); /* Clear the event handler busy flag (RW1C); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index fa7e1ef36cd9..fe95602a8ea8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -685,20 +685,23 @@ void xhci_stop(struct usb_hcd *hcd) u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_HALTED) - return; - mutex_lock(&xhci->mutex); - spin_lock_irq(&xhci->lock); - xhci->xhc_state |= XHCI_STATE_HALTED; - xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; - /* Make sure the xHC is halted for a USB3 roothub - * (xhci_stop() could be called as part of failed init). - */ - xhci_halt(xhci); - xhci_reset(xhci); - spin_unlock_irq(&xhci->lock); + if (!(xhci->xhc_state & XHCI_STATE_HALTED)) { + spin_lock_irq(&xhci->lock); + + xhci->xhc_state |= XHCI_STATE_HALTED; + xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; + xhci_halt(xhci); + xhci_reset(xhci); + + spin_unlock_irq(&xhci->lock); + } + + if (!usb_hcd_is_primary_hcd(hcd)) { + mutex_unlock(&xhci->mutex); + return; + } xhci_cleanup_msix(xhci); From 3425aa03f484d45dc21e0e791c2f6c74ea656421 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 1 Jun 2016 18:09:08 +0300 Subject: [PATCH 0122/1050] xhci: Fix handling timeouted commands on hosts in weird states. If commands timeout we mark them for abortion, then stop the command ring, and turn the commands to no-ops and finally restart the command ring. If the host is working properly the no-op commands will finish and pending completions are called. If we notice the host is failing, driver clears the command ring and completes, deletes and frees all pending commands. There are two separate cases reported where host is believed to work properly but is not. In the first case we successfully stop the ring but no abort or stop command ring event is ever sent and host locks up. The second case is if a host is removed, command times out and driver believes the ring is stopped, and assumes it will be restarted, but actually ends up timing out on the same command forever. If one of the pending commands has the xhci->mutex held it will block xhci_stop() in the remove codepath which otherwise would cleanup pending commands. Add a check that clears all pending commands in case host is removed, or we are stuck timing out on the same command. Also restart the command timeout timer when stopping the command ring to ensure we recive an ring stop/abort event. Cc: stable Tested-by: Joe Lawrence Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1287339f11bb..d7d502578d79 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -290,6 +290,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; + + /* + * Writing the CMD_RING_ABORT bit should cause a cmd completion event, + * however on some host hw the CMD_RING_RUNNING bit is correctly cleared + * but the completion event in never sent. Use the cmd timeout timer to + * handle those cases. Use twice the time to cover the bit polling retry + */ + mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT)); xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); @@ -314,6 +322,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) xhci_err(xhci, "Stopped the command ring failed, " "maybe the host is dead\n"); + del_timer(&xhci->cmd_timer); xhci->xhc_state |= XHCI_STATE_DYING; xhci_quiesce(xhci); xhci_halt(xhci); @@ -1246,22 +1255,21 @@ void xhci_handle_command_timeout(unsigned long data) int ret; unsigned long flags; u64 hw_ring_state; - struct xhci_command *cur_cmd = NULL; + bool second_timeout = false; xhci = (struct xhci_hcd *) data; /* mark this command to be cancelled */ spin_lock_irqsave(&xhci->lock, flags); if (xhci->current_cmd) { - cur_cmd = xhci->current_cmd; - cur_cmd->status = COMP_CMD_ABORT; + if (xhci->current_cmd->status == COMP_CMD_ABORT) + second_timeout = true; + xhci->current_cmd->status = COMP_CMD_ABORT; } - /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { - spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "Command timeout\n"); ret = xhci_abort_cmd_ring(xhci); @@ -1273,6 +1281,15 @@ void xhci_handle_command_timeout(unsigned long data) } return; } + + /* command ring failed to restart, or host removed. Bail out */ + if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_dbg(xhci, "command timed out twice, ring start fail?\n"); + xhci_cleanup_command_queue(xhci); + return; + } + /* command timeout on stopped ring, ring can't be aborted */ xhci_dbg(xhci, "Command timeout on stopped ring\n"); xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd); From de95c40d5beaa47f6dc8fe9ac4159b4672b51523 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 1 Jun 2016 18:09:09 +0300 Subject: [PATCH 0123/1050] usb: xhci-plat: properly handle probe deferral for devm_clk_get() On some platforms, the clocks might be registered by a platform driver. When this is the case, the clock platform driver may very well be probed after xhci-plat, in which case the first probe() invocation of xhci-plat will receive -EPROBE_DEFER as the return value of devm_clk_get(). The current code handles that as a normal error, and simply assumes that this means that the system doesn't have a clock for the XHCI controller, and continues probing without calling clk_prepare_enable(). Unfortunately, this doesn't work on systems where the XHCI controller does have a clock, but that clock is provided by another platform driver. In order to fix this situation, we handle the -EPROBE_DEFER error condition specially, and abort the XHCI controller probe(). It will be retried later automatically, the clock will be available, devm_clk_get() will succeed, and the probe() will continue with the clock prepared and enabled as expected. In practice, such issue is seen on the ARM64 Marvell 7K/8K platform, where the clocks are registered by a platform driver. Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 676ea458148b..1f3f981fe7f8 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -196,6 +196,9 @@ static int xhci_plat_probe(struct platform_device *pdev) ret = clk_prepare_enable(clk); if (ret) goto put_hcd; + } else if (PTR_ERR(clk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto put_hcd; } xhci = hcd_to_xhci(hcd); From 757de492f2d5711d4f5b386eb9bdd5cdc99eb30e Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 1 Jun 2016 18:09:10 +0300 Subject: [PATCH 0124/1050] xhci: fix platform quirks overwrite regression in 4.7-rc1 commit b1c127ae990b ("usb: host: xhci: plat: make use of new methods in xhci_plat_priv") sets xhci->quirks before calling xhci_gen_setup(), which will overwrite them. Don't overwite the quirks, just add the new ones Fixes: b1c127ae990b ("usb: host: xhci: plat: make use of new methods in xhci_plat_priv") Reported-by: Yoshihiro Shimoda Cc: Felipe Balbi Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index fe95602a8ea8..f2f9518c53ab 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4889,7 +4889,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) xhci->hcc_params2 = readl(&xhci->cap_regs->hcc_params2); xhci_print_registers(xhci); - xhci->quirks = quirks; + xhci->quirks |= quirks; get_quirks(dev, xhci); From 81099f97bd31e25ff2719a435b1860fc3876122f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 19 May 2016 17:12:19 +0200 Subject: [PATCH 0125/1050] usb: quirks: Fix sorting Properly sort all the entries by vendor id. Signed-off-by: Hans de Goede Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6dc810bce295..8130e38cae9d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, + /* USB3503 */ + { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Microsoft Wireless Laser Mouse 6000 Receiver */ { USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* MAYA44USB sound device */ { USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -188,16 +195,6 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, - /* INTEL VALUE SSD */ - { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* USB3503 */ - { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* ASUS Base Station(T100) */ - { USB_DEVICE(0x0b05, 0x17e0), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* Protocol and OTG Electrical Test Device */ { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, @@ -208,6 +205,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Blackmagic Design UltraStudio SDI */ { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + /* INTEL VALUE SSD */ + { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; From 32cb0b37098f4beeff5ad9e325f11b42a6ede56c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 19 May 2016 17:12:20 +0200 Subject: [PATCH 0126/1050] usb: quirks: Add no-lpm quirk for Acer C120 LED Projector The Acer C120 LED Projector is a USB-3 connected pico projector which takes both its power and video data from USB-3. In combination with some hubs this device does not play well with lpm, so disable lpm for it. Signed-off-by: Hans de Goede Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8130e38cae9d..944a6dca0fcb 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -199,6 +199,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Acer C120 LED Projector */ + { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM }, + /* Blackmagic Design Intensity Shuttle */ { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM }, From 5fc363232ae76805d7f570dbb61758095a540591 Mon Sep 17 00:00:00 2001 From: Tom Yan Date: Tue, 24 May 2016 03:28:44 +0800 Subject: [PATCH 0127/1050] uas: remove can_queue set in host template Commit 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level") made qdepth limit set in host template (`.can_queue = MAX_CMNDS`) redundant. Removing it to avoid confusion. Signed-off-by: Tom Yan Acked-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 4d49fce406e1..e03c490616ef 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -848,7 +848,6 @@ static struct scsi_host_template uas_host_template = { .slave_configure = uas_slave_configure, .eh_abort_handler = uas_eh_abort_handler, .eh_bus_reset_handler = uas_eh_bus_reset_handler, - .can_queue = MAX_CMNDS, .this_id = -1, .sg_tablesize = SG_NONE, .skip_settle_delay = 1, From 593224ea77b1ca842f45cf76f4deeef44dfbacd1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 31 May 2016 09:18:03 +0200 Subject: [PATCH 0128/1050] USB: uas: Fix slave queue_depth not being set Commit 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level") removed the scsi_change_queue_depth() call from uas_slave_configure() assuming that the slave would inherit the host's queue_depth, which that commit sets to the same value. This is incorrect, without the scsi_change_queue_depth() call the slave's queue_depth defaults to 1, introducing a performance regression. This commit restores the call, fixing the performance regression. Cc: stable@vger.kernel.org Fixes: 198de51dbc34 ("USB: uas: Limit qdepth at the scsi-host level") Reported-by: Tom Yan Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index e03c490616ef..5ef014ba6ae8 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -836,6 +836,7 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_BROKEN_FUA) sdev->broken_fua = 1; + scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } From 85e3990bea49a50cb389015fea564b58899ab7c1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 19 May 2016 16:29:50 -0400 Subject: [PATCH 0129/1050] USB: EHCI: avoid undefined pointer arithmetic and placate UBSAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several people have reported that UBSAN doesn't like the pointer arithmetic in ehci_hub_control(): u32 __iomem *status_reg = &ehci->regs->port_status[ (wIndex & 0xff) - 1]; u32 __iomem *hostpc_reg = &ehci->regs->hostpc[(wIndex & 0xff) - 1]; If wIndex is 0 (and it often is), these calculations underflow and UBSAN complains. According to the C standard, pointer computations leading to locations outside the bounds of an array object (other than 1 position past the end) are undefined. In this case, the compiler would be justified in concluding the wIndex can never be 0 and then optimizing away the tests for !wIndex that occur later in the subroutine. (Although, since ehci->regs->port_status and ehci->regs->hostpc are both 0-length arrays and are thus GCC extensions to the C standard, it's not clear what the compiler is really allowed to do.) At any rate, we can avoid all these difficulties, at the cost of making the code slightly longer, by not decrementing the index when it is equal to 0. The runtime effect is minimal, and anyway ehci_hub_control() is not on a hot path. Signed-off-by: Alan Stern Reported-by: Valdis Kletnieks Reported-by: Meelis Roos Reported-by: Martin_MOKREJÅ Reported-by: "Navin P.S" CC: Andrey Ryabinin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index ffc90295a95f..74f62d68f013 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -872,14 +872,22 @@ int ehci_hub_control( ) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); int ports = HCS_N_PORTS (ehci->hcs_params); - u32 __iomem *status_reg = &ehci->regs->port_status[ - (wIndex & 0xff) - 1]; - u32 __iomem *hostpc_reg = &ehci->regs->hostpc[(wIndex & 0xff) - 1]; + u32 __iomem *status_reg, *hostpc_reg; u32 temp, temp1, status; unsigned long flags; int retval = 0; unsigned selector; + /* + * Avoid underflow while calculating (wIndex & 0xff) - 1. + * The compiler might deduce that wIndex can never be 0 and then + * optimize away the tests for !wIndex below. + */ + temp = wIndex & 0xff; + temp -= (temp > 0); + status_reg = &ehci->regs->port_status[temp]; + hostpc_reg = &ehci->regs->hostpc[temp]; + /* * FIXME: support SetPortFeatures USB_PORT_FEAT_INDICATOR. * HCS_INDICATOR may say we can change LEDs to off/amber/green. From b9610e74586fd183b2d1c7fe5316bce8b6cc534f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 1 Jun 2016 14:56:57 -0700 Subject: [PATCH 0130/1050] clk: Remove CLK_IS_ROOT flag Now that we've gotten rid of all the users of this flag we can retire the number, leaving a slot open for a future flag user. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0c72204c75fc..fb39d5add173 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -25,7 +25,7 @@ #define CLK_SET_PARENT_GATE BIT(1) /* must be gated across re-parent */ #define CLK_SET_RATE_PARENT BIT(2) /* propagate rate change up one level */ #define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */ -#define CLK_IS_ROOT BIT(4) /* Deprecated: Don't use */ + /* unused */ #define CLK_IS_BASIC BIT(5) /* Basic clk, can't do a to_clk_foo() */ #define CLK_GET_RATE_NOCACHE BIT(6) /* do not use the cached clk rate */ #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */ From d95815ba6a0f287213118c136e64d8c56daeaeab Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Jun 2016 21:01:29 +0200 Subject: [PATCH 0131/1050] USB: xhci: Add broken streams quirk for Frescologic device id 1009 I got one of these cards for testing uas with, it seems that with streams it dma-s all over the place, corrupting memory. On my first tests it managed to dma over the BIOS of the motherboard somehow and completely bricked it. Tests on another motherboard show that it does work with streams disabled. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 48672fac7ff3..c10972fcc8e4 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -37,6 +37,7 @@ /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 +#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009 0x1009 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400 0x1400 #define PCI_VENDOR_ID_ETRON 0x1b6f @@ -114,6 +115,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; } + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) + xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; From b5801212229f6ca5c418c68cd1e0548f4b53f624 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 31 May 2016 10:05:03 -0500 Subject: [PATCH 0132/1050] usb: musb: host: clear rxcsr error bit if set The MUSB Programming Guide states that the driver should clear RXCSR bit2 when the controller sets the bit. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 2f8ad7f1f482..931381c51ad7 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1870,6 +1870,9 @@ void musb_host_rx(struct musb *musb, u8 epnum) status = -EPROTO; musb_writeb(epio, MUSB_RXINTERVAL, 0); + rx_csr &= ~MUSB_RXCSR_H_ERROR; + musb_writew(epio, MUSB_RXCSR, rx_csr); + } else if (rx_csr & MUSB_RXCSR_DATAERROR) { if (USB_ENDPOINT_XFER_ISOC != qh->type) { From dbac5d07d13e330e6706813c9fde477140fb5d80 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 31 May 2016 10:05:04 -0500 Subject: [PATCH 0133/1050] usb: musb: host: don't start next rx urb if current one failed urb->status is set when endpoint csr RXSTALL, H_ERROR, DATAERROR or INCOMPRX bit is set. Those bits mean a broken pipe, so don't start next urb when any of these bits is set by checking urb->status. To minimize the risk of regression, only do so for RX, until we have a test case to understand the behavior of TX. The patch fixes system freeze issue caused by repeatedly invoking RX ISR while removing a usb uart device connected to a hub, in which case the hub has no chance to report the disconnect event due to the kernel is busy in processing the RX interrupt flooding. Fix checkpatch complaint (qh != NULL) as while. Reported-by: Max Uvarov Tested-by: Yegor Yefremov Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 931381c51ad7..dd1ebde02786 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -434,7 +434,13 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb, } } - if (qh != NULL && qh->is_ready) { + /* + * The pipe must be broken if current urb->status is set, so don't + * start next urb. + * TODO: to minimize the risk of regression, only check urb->status + * for RX, until we have a test case to understand the behavior of TX. + */ + if ((!status || !is_in) && qh && qh->is_ready) { dev_dbg(musb->controller, "... next ep%d %cX urb %p\n", hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh)); musb_start_urb(musb, is_in, qh); From 858b9be7fda1d4232e8f06d70e4793bc21583ffc Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 May 2016 10:05:05 -0500 Subject: [PATCH 0134/1050] usb: musb: host: move DMA engine check from musb_tx_dma_set_mode_cppi_tusb() to its caller Commit 754fe4a92c07 ("usb: musb: Remove ifdefs for TX DMA for musb_host.c") looks incomplete: the DMA engine checks are done outside the Mentor/UX500 handler but inside the CPPI/TUSB handler. Move the checks out of the CPPI/ TUSB handler into its caller, musb_tx_dma_program(). Signed-off-by: Sergei Shtylyov [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index dd1ebde02786..45a489e3aeaf 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -684,9 +684,6 @@ static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma, { struct dma_channel *channel = hw_ep->tx_channel; - if (!is_cppi_enabled(hw_ep->musb) && !tusb_dma_omap(hw_ep->musb)) - return -ENODEV; - channel->actual_len = 0; /* @@ -710,9 +707,11 @@ static bool musb_tx_dma_program(struct dma_controller *dma, if (musb_dma_inventra(hw_ep->musb) || musb_dma_ux500(hw_ep->musb)) res = musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, offset, &length, &mode); - else + else if (is_cppi_enabled(hw_ep->musb) || tusb_dma_omap(hw_ep->musb)) res = musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, offset, &length, &mode); + else + return false; if (res) return false; From b6a6631dc322b1ca187752a77bc5f74155017ecb Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 May 2016 10:05:06 -0500 Subject: [PATCH 0135/1050] usb: musb: host: make musb_tx_dma_set_mode_*() *void* Now that the DMA engine check was moved to musb_tx_dma_porgram(), both musb_tx_dma_set_mode_cppi_tusb() and musb_tx_dma_set_mode_mentor() always return 0, so we can make both these functions *void*. Signed-off-by: Sergei Shtylyov [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 45a489e3aeaf..f02361dbfd52 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -633,7 +633,7 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum) ep->rx_reinit = 0; } -static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma, +static void musb_tx_dma_set_mode_mentor(struct dma_controller *dma, struct musb_hw_ep *hw_ep, struct musb_qh *qh, struct urb *urb, u32 offset, u32 *length, u8 *mode) @@ -670,17 +670,15 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma, } channel->desired_mode = *mode; musb_writew(epio, MUSB_TXCSR, csr); - - return 0; } -static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma, - struct musb_hw_ep *hw_ep, - struct musb_qh *qh, - struct urb *urb, - u32 offset, - u32 *length, - u8 *mode) +static void musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma, + struct musb_hw_ep *hw_ep, + struct musb_qh *qh, + struct urb *urb, + u32 offset, + u32 *length, + u8 *mode) { struct dma_channel *channel = hw_ep->tx_channel; @@ -691,8 +689,6 @@ static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma, * to identify the zero-length-final-packet case. */ *mode = (urb->transfer_flags & URB_ZERO_PACKET) ? 1 : 0; - - return 0; } static bool musb_tx_dma_program(struct dma_controller *dma, @@ -702,18 +698,15 @@ static bool musb_tx_dma_program(struct dma_controller *dma, struct dma_channel *channel = hw_ep->tx_channel; u16 pkt_size = qh->maxpacket; u8 mode; - int res; if (musb_dma_inventra(hw_ep->musb) || musb_dma_ux500(hw_ep->musb)) - res = musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, - offset, &length, &mode); + musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, offset, + &length, &mode); else if (is_cppi_enabled(hw_ep->musb) || tusb_dma_omap(hw_ep->musb)) - res = musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, - offset, &length, &mode); + musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, offset, + &length, &mode); else return false; - if (res) - return false; qh->segsize = length; From 37f30d887a41775066b78f8c0fa9a4929638db07 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 31 May 2016 10:05:07 -0500 Subject: [PATCH 0136/1050] usb: musb: sunxi: Add set_mode platform function Move the mode handling to the platform_set_mode callback. Signed-off-by: Hans de Goede [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index fdab4232cfbf..2c33d9bca587 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -264,15 +264,6 @@ static int sunxi_musb_init(struct musb *musb) if (ret) goto error_unregister_notifier; - if (musb->port_mode == MUSB_PORT_MODE_HOST) { - ret = phy_power_on(glue->phy); - if (ret) - goto error_phy_exit; - set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags); - /* Stop musb work from turning vbus off again */ - set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); - } - musb->isr = sunxi_musb_interrupt; /* Stop the musb-core from doing runtime pm (not supported on sunxi) */ @@ -280,8 +271,6 @@ static int sunxi_musb_init(struct musb *musb) return 0; -error_phy_exit: - phy_exit(glue->phy); error_unregister_notifier: if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE) extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST, @@ -323,6 +312,24 @@ static int sunxi_musb_exit(struct musb *musb) return 0; } +static int sunxi_set_mode(struct musb *musb, u8 mode) +{ + struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent); + int ret; + + if (mode == MUSB_HOST) { + ret = phy_power_on(glue->phy); + if (ret) + return ret; + + set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags); + /* Stop musb work from turning vbus off again */ + set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); + } + + return 0; +} + static void sunxi_musb_enable(struct musb *musb) { struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent); @@ -569,6 +576,7 @@ static const struct musb_platform_ops sunxi_musb_ops = { .exit = sunxi_musb_exit, .enable = sunxi_musb_enable, .disable = sunxi_musb_disable, + .set_mode = sunxi_set_mode, .fifo_offset = sunxi_musb_fifo_offset, .ep_offset = sunxi_musb_ep_offset, .busctl_offset = sunxi_musb_busctl_offset, From a60d541a2d402c8645d5bb2ec8dabe474b66e018 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 31 May 2016 10:05:08 -0500 Subject: [PATCH 0137/1050] usb: musb: sunxi: Set state to A_WAIT_VRISE when enabling Vbus When the board is powering attached usb devices via the otg port sometimes / on some devices it takes slightly too long for the Vbus detection code in phy-sun4i-usb.c to signal that Vbus is high after enabling Vbus and the musb hardware signals a MUSB_INTR_VBUSERROR interrupt. This commit sets the otg state to A_WAIT_VRISE upon enabling Vbus making musb_stage0_irq() ignore the first VBUSERR_RETRY_COUNT VBUSERROR interrupts, fixing connection issues in these cases. Signed-off-by: Hans de Goede [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index 2c33d9bca587..e7d46179b485 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -112,7 +112,7 @@ static void sunxi_musb_work(struct work_struct *work) if (test_bit(SUNXI_MUSB_FL_HOSTMODE, &glue->flags)) { set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); musb->xceiv->otg->default_a = 1; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; + musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; MUSB_HST_MODE(musb); devctl |= MUSB_DEVCTL_SESSION; } else { @@ -145,10 +145,12 @@ static void sunxi_musb_set_vbus(struct musb *musb, int is_on) { struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent); - if (is_on) + if (is_on) { set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); - else + musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; + } else { clear_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); + } schedule_work(&glue->work); } @@ -325,6 +327,7 @@ static int sunxi_set_mode(struct musb *musb, u8 mode) set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags); /* Stop musb work from turning vbus off again */ set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags); + musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; } return 0; From 7e1704dcf6b0a895f99e386ccc9ca631117e6d5b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:09 -0500 Subject: [PATCH 0138/1050] usb: musb: Fix idling after host mode by increasing autosuspend delay Looks like at least 2430 glue won't idle reliably with the 200 ms autosuspend delay. This causes deeper idle states being blocked for the whole SoC when disconnecting OTG A cable. Increasing the delay to 500 ms seems to idle both MUSB and the PHY reliably. This is probably because of time needed by the hardware based negotiation between MUSB and the PHY. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 39fd95833eb8..460176c43e84 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2028,9 +2028,15 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_readl = musb_default_readl; musb_writel = musb_default_writel; - /* We need musb_read/write functions initialized for PM */ + /* + * We need musb_read/write functions initialized for PM. + * Note that at least 2430 glue needs autosuspend delay + * somewhere above 300 ms for the hardware to idle properly + * after disconnecting the cable in host mode. Let's use + * 500 ms for some margin. + */ pm_runtime_use_autosuspend(musb->controller); - pm_runtime_set_autosuspend_delay(musb->controller, 200); + pm_runtime_set_autosuspend_delay(musb->controller, 500); pm_runtime_enable(musb->controller); /* The musb_platform_init() call: From 302f6802395f58dceb225b1c9e603de72f09b8b0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:10 -0500 Subject: [PATCH 0139/1050] usb: musb: Remove unnecessary shutdown function We have remove() already calling shutdown(), so let's drop it and move the code to remove(). No code changes, we'll drop the the FIXME in the following patch with more clean-up. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 39 +++++++++++++----------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 460176c43e84..c370ed59ef7b 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1090,29 +1090,6 @@ void musb_stop(struct musb *musb) musb_platform_try_idle(musb, 0); } -static void musb_shutdown(struct platform_device *pdev) -{ - struct musb *musb = dev_to_musb(&pdev->dev); - unsigned long flags; - - pm_runtime_get_sync(musb->controller); - - musb_host_cleanup(musb); - musb_gadget_cleanup(musb); - - spin_lock_irqsave(&musb->lock, flags); - musb_platform_disable(musb); - musb_generic_disable(musb); - spin_unlock_irqrestore(&musb->lock, flags); - - musb_writeb(musb->mregs, MUSB_DEVCTL, 0); - musb_platform_exit(musb); - - pm_runtime_put(musb->controller); - /* FIXME power down */ -} - - /*-------------------------------------------------------------------------*/ /* @@ -2318,6 +2295,7 @@ static int musb_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct musb *musb = dev_to_musb(dev); + unsigned long flags; /* this gets called on rmmod. * - Host mode: host may still be active @@ -2325,7 +2303,19 @@ static int musb_remove(struct platform_device *pdev) * - OTG mode: both roles are deactivated (or never-activated) */ musb_exit_debugfs(musb); - musb_shutdown(pdev); + + pm_runtime_get_sync(musb->controller); + musb_host_cleanup(musb); + musb_gadget_cleanup(musb); + spin_lock_irqsave(&musb->lock, flags); + musb_platform_disable(musb); + musb_generic_disable(musb); + spin_unlock_irqrestore(&musb->lock, flags); + musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + musb_platform_exit(musb); + pm_runtime_put(musb->controller); + /* FIXME power down */ + musb_phy_callback = NULL; if (musb->dma_controller) @@ -2618,7 +2608,6 @@ static struct platform_driver musb_driver = { }, .probe = musb_probe, .remove = musb_remove, - .shutdown = musb_shutdown, }; module_platform_driver(musb_driver); From 7099dbc5b3eb9f52efdb78406826f7463f07d71c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:11 -0500 Subject: [PATCH 0140/1050] usb: musb: Update to use PM runtime autosuspend Let's make the PM runtime use the standard autosuspend calls. Commit 5de85b9d57ab ("PM / runtime: Re-init runtime PM states at probe error and driver unbind") means we must pair use_autosuspend with dont_use_autosuspend and then use put_sync to properly idle the device. Note that we'll be removing the PM runtime calls from the glue layer to the MUSB core in the next patch. And we can also remove the pointless FIXME comment now. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 9 ++++++--- drivers/usb/musb/musb_gadget.c | 3 ++- drivers/usb/musb/omap2430.c | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index c370ed59ef7b..89c270a99549 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2220,7 +2220,8 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) if (status) goto fail5; - pm_runtime_put(musb->controller); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); /* * For why this is currently needed, see commit 3e43a0725637 @@ -2248,6 +2249,7 @@ fail2_5: usb_phy_shutdown(musb->xceiv); err_usb_phy_init: + pm_runtime_dont_use_autosuspend(musb->controller); pm_runtime_put_sync(musb->controller); fail2: @@ -2313,8 +2315,6 @@ static int musb_remove(struct platform_device *pdev) spin_unlock_irqrestore(&musb->lock, flags); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); musb_platform_exit(musb); - pm_runtime_put(musb->controller); - /* FIXME power down */ musb_phy_callback = NULL; @@ -2326,6 +2326,9 @@ static int musb_remove(struct platform_device *pdev) cancel_work_sync(&musb->irq_work); cancel_delayed_work_sync(&musb->finish_resume_work); cancel_delayed_work_sync(&musb->deassert_reset_work); + pm_runtime_dont_use_autosuspend(musb->controller); + pm_runtime_put_sync(musb->controller); + pm_runtime_disable(musb->controller); musb_free(musb); device_init_wakeup(dev, 0); return 0; diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 152865b36522..fff5a8a283e3 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1963,7 +1963,8 @@ static int musb_gadget_stop(struct usb_gadget *g) * that currently misbehaves. */ - pm_runtime_put(musb->controller); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return 0; } diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index c84e0322c108..07363d28fbc1 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -435,8 +435,9 @@ static int omap2430_musb_init(struct musb *musb) phy_init(musb->phy); phy_power_on(musb->phy); - pm_runtime_put_noidle(musb->controller); - pm_runtime_put_noidle(glue->dev); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); + pm_runtime_put(glue->dev); return 0; err1: From f730f205cc5116b6edfedf568fdfbb4935248e8e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:12 -0500 Subject: [PATCH 0141/1050] usb: musb: Split PM runtime between wrapper IP and musb core Let's not tinker with the PM runtime of musb core from the omap2430 wrapper. This allows us to initialize PM runtime for musb core later on instead of doing it in stages. And omap2430 wrapper has no need to for accessing musb core at this point. Note that this does not remove all the PM runtime calls from the glue layer, those will get removed in a later patch. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 43 ++++++++++++++++-------------------- drivers/usb/musb/omap2430.c | 10 --------- 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 89c270a99549..23888d579e8b 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2005,17 +2005,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_readl = musb_default_readl; musb_writel = musb_default_writel; - /* - * We need musb_read/write functions initialized for PM. - * Note that at least 2430 glue needs autosuspend delay - * somewhere above 300 ms for the hardware to idle properly - * after disconnecting the cable in host mode. Let's use - * 500 ms for some margin. - */ - pm_runtime_use_autosuspend(musb->controller); - pm_runtime_set_autosuspend_delay(musb->controller, 500); - pm_runtime_enable(musb->controller); - /* The musb_platform_init() call: * - adjusts musb->mregs * - sets the musb->isr @@ -2117,6 +2106,16 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) if (musb->ops->phy_callback) musb_phy_callback = musb->ops->phy_callback; + /* + * We need musb_read/write functions initialized for PM. + * Note that at least 2430 glue needs autosuspend delay + * somewhere above 300 ms for the hardware to idle properly + * after disconnecting the cable in host mode. Let's use + * 500 ms for some margin. + */ + pm_runtime_use_autosuspend(musb->controller); + pm_runtime_set_autosuspend_delay(musb->controller, 500); + pm_runtime_enable(musb->controller); pm_runtime_get_sync(musb->controller); status = usb_phy_init(musb->xceiv); @@ -2251,6 +2250,7 @@ fail2_5: err_usb_phy_init: pm_runtime_dont_use_autosuspend(musb->controller); pm_runtime_put_sync(musb->controller); + pm_runtime_disable(musb->controller); fail2: if (musb->irq_wake) @@ -2258,7 +2258,6 @@ fail2: musb_platform_exit(musb); fail1: - pm_runtime_disable(musb->controller); dev_err(musb->controller, "musb_init_controller failed with status %d\n", status); @@ -2306,6 +2305,9 @@ static int musb_remove(struct platform_device *pdev) */ musb_exit_debugfs(musb); + cancel_work_sync(&musb->irq_work); + cancel_delayed_work_sync(&musb->finish_resume_work); + cancel_delayed_work_sync(&musb->deassert_reset_work); pm_runtime_get_sync(musb->controller); musb_host_cleanup(musb); musb_gadget_cleanup(musb); @@ -2314,21 +2316,14 @@ static int musb_remove(struct platform_device *pdev) musb_generic_disable(musb); spin_unlock_irqrestore(&musb->lock, flags); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); - musb_platform_exit(musb); - - musb_phy_callback = NULL; - - if (musb->dma_controller) - musb_dma_controller_destroy(musb->dma_controller); - - usb_phy_shutdown(musb->xceiv); - - cancel_work_sync(&musb->irq_work); - cancel_delayed_work_sync(&musb->finish_resume_work); - cancel_delayed_work_sync(&musb->deassert_reset_work); pm_runtime_dont_use_autosuspend(musb->controller); pm_runtime_put_sync(musb->controller); pm_runtime_disable(musb->controller); + musb_platform_exit(musb); + musb_phy_callback = NULL; + if (musb->dma_controller) + musb_dma_controller_destroy(musb->dma_controller); + usb_phy_shutdown(musb->xceiv); musb_free(musb); device_init_wakeup(dev, 0); return 0; diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 07363d28fbc1..3ce94bf3f1c7 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -400,13 +400,6 @@ static int omap2430_musb_init(struct musb *musb) if (status < 0) goto err1; - status = pm_runtime_get_sync(dev); - if (status < 0) { - dev_err(dev, "pm_runtime_get_sync FAILED %d\n", status); - pm_runtime_put_sync(glue->dev); - goto err1; - } - l = musb_readl(musb->mregs, OTG_INTERFSEL); if (data->interface_type == MUSB_INTERFACE_UTMI) { @@ -434,9 +427,6 @@ static int omap2430_musb_init(struct musb *musb) phy_init(musb->phy); phy_power_on(musb->phy); - - pm_runtime_mark_last_busy(musb->controller); - pm_runtime_put_autosuspend(musb->controller); pm_runtime_put(glue->dev); return 0; From 30647217909ea8ce4d8f4905b15fa1bb09d80859 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:13 -0500 Subject: [PATCH 0142/1050] usb: musb: Remove conditional PM runtime calls for musb_gadget The conditional use of PM runtime does not work properly for musb gadget. On cable disconnect we may not get any USB_EVENT_NONE leaving the PM runtime call unpaired. Let's fix the issue by making sure the PM runtime calls are paired within the functions. The glue layer will take care of the rest. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index fff5a8a283e3..7ecc893ff3ba 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1914,8 +1914,8 @@ static int musb_gadget_start(struct usb_gadget *g, if (musb->xceiv->last_event == USB_EVENT_ID) musb_platform_set_vbus(musb, 1); - if (musb->xceiv->last_event == USB_EVENT_NONE) - pm_runtime_put(musb->controller); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return 0; @@ -1934,8 +1934,7 @@ static int musb_gadget_stop(struct usb_gadget *g) struct musb *musb = gadget_to_musb(g); unsigned long flags; - if (musb->xceiv->last_event == USB_EVENT_NONE) - pm_runtime_get_sync(musb->controller); + pm_runtime_get_sync(musb->controller); /* * REVISIT always use otg_set_peripheral() here too; From 517bafffcaf8f882299a74e310082e7b6b2288ad Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:14 -0500 Subject: [PATCH 0143/1050] usb: musb: Use delayed for musb_gadget_pullup We have MUSB setting pm_runtime_irq_safe with the following commits: 30a70b026b4c ("usb: musb: fix obex in g_nokia.ko causing kernel panic") 3e43a0725637 ("usb: musb: core: add pm_runtime_irq_safe()") Let's fix things to use delayed work so we can remove the pm_runtime_irq_safe. Note that we may want to set this up in a generic way in the gadget framework eventually. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.h | 1 + drivers/usb/musb/musb_gadget.c | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index b6afe9e43305..29473846b098 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -312,6 +312,7 @@ struct musb { struct work_struct irq_work; struct delayed_work deassert_reset_work; struct delayed_work finish_resume_work; + struct delayed_work gadget_work; u16 hwvers; u16 intrrxe; diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 7ecc893ff3ba..af2a3a7addf9 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1656,6 +1656,20 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA) return usb_phy_set_power(musb->xceiv, mA); } +static void musb_gadget_work(struct work_struct *work) +{ + struct musb *musb; + unsigned long flags; + + musb = container_of(work, struct musb, gadget_work.work); + pm_runtime_get_sync(musb->controller); + spin_lock_irqsave(&musb->lock, flags); + musb_pullup(musb, musb->softconnect); + spin_unlock_irqrestore(&musb->lock, flags); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); +} + static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) { struct musb *musb = gadget_to_musb(gadget); @@ -1663,20 +1677,16 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) is_on = !!is_on; - pm_runtime_get_sync(musb->controller); - /* NOTE: this assumes we are sensing vbus; we'd rather * not pullup unless the B-session is active. */ spin_lock_irqsave(&musb->lock, flags); if (is_on != musb->softconnect) { musb->softconnect = is_on; - musb_pullup(musb, is_on); + schedule_delayed_work(&musb->gadget_work, 0); } spin_unlock_irqrestore(&musb->lock, flags); - pm_runtime_put(musb->controller); - return 0; } @@ -1845,7 +1855,7 @@ int musb_gadget_setup(struct musb *musb) #elif IS_ENABLED(CONFIG_USB_MUSB_GADGET) musb->g.is_otg = 0; #endif - + INIT_DELAYED_WORK(&musb->gadget_work, musb_gadget_work); musb_g_init_endpoints(musb); musb->is_active = 0; @@ -1866,6 +1876,8 @@ void musb_gadget_cleanup(struct musb *musb) { if (musb->port_mode == MUSB_PORT_MODE_HOST) return; + + cancel_delayed_work_sync(&musb->gadget_work); usb_del_gadget_udc(&musb->g); } From 21f77beece2b6f479648192a87ededd2fb8f1716 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:15 -0500 Subject: [PATCH 0144/1050] usb: musb: Handle cable status better for 2430 glue layer We may have drivers loaded but no configured gadgets and MUSB may be in host mode. If gadgets are configured during host mode, PM runtime will get confused. Disable PM runtime from gadget state, and do it based on the cable and last state. Note that we may get multiple cable events, so we need to keep track of the power state. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 68 ++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 3ce94bf3f1c7..fa9932910969 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -49,6 +49,9 @@ struct omap2430_glue { enum musb_vbus_id_status status; struct work_struct omap_musb_mailbox_work; struct device *control_otghs; + bool cable_connected; + bool enabled; + bool powered; }; #define glue_to_musb(g) platform_get_drvdata(g->musb) @@ -234,6 +237,45 @@ static inline void omap2430_low_level_init(struct musb *musb) musb_writel(musb->mregs, OTG_FORCESTDBY, l); } +/* + * We can get multiple cable events so we need to keep track + * of the power state. Only keep power enabled if USB cable is + * connected and a gadget is started. + */ +static void omap2430_set_power(struct musb *musb, bool enabled, bool cable) +{ + struct device *dev = musb->controller; + struct omap2430_glue *glue = dev_get_drvdata(dev->parent); + bool power_up; + int res; + + if (glue->enabled != enabled) + glue->enabled = enabled; + + if (glue->cable_connected != cable) + glue->cable_connected = cable; + + power_up = glue->enabled && glue->cable_connected; + if (power_up == glue->powered) { + dev_warn(musb->controller, "power state already %i\n", + power_up); + return; + } + + glue->powered = power_up; + + if (power_up) { + res = pm_runtime_get_sync(musb->controller); + if (res < 0) { + dev_err(musb->controller, "could not enable: %i", res); + glue->powered = false; + } + } else { + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); + } +} + static void omap2430_musb_mailbox(enum musb_vbus_id_status status) { struct omap2430_glue *glue = _glue; @@ -259,6 +301,13 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev); struct omap_musb_board_data *data = pdata->board_data; struct usb_otg *otg = musb->xceiv->otg; + bool cable_connected; + + cable_connected = ((glue->status == MUSB_ID_GROUND) || + (glue->status == MUSB_VBUS_VALID)); + + if (cable_connected) + omap2430_set_power(musb, glue->enabled, cable_connected); switch (glue->status) { case MUSB_ID_GROUND: @@ -268,7 +317,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) musb->xceiv->otg->state = OTG_STATE_A_IDLE; musb->xceiv->last_event = USB_EVENT_ID; if (musb->gadget_driver) { - pm_runtime_get_sync(dev); omap_control_usb_set_mode(glue->control_otghs, USB_MODE_HOST); omap2430_musb_set_vbus(musb, 1); @@ -281,8 +329,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) otg->default_a = false; musb->xceiv->otg->state = OTG_STATE_B_IDLE; musb->xceiv->last_event = USB_EVENT_VBUS; - if (musb->gadget_driver) - pm_runtime_get_sync(dev); omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE); break; @@ -291,11 +337,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) dev_dbg(dev, "VBUS Disconnect\n"); musb->xceiv->last_event = USB_EVENT_NONE; - if (musb->gadget_driver) { + if (musb->gadget_driver) omap2430_musb_set_vbus(musb, 0); - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } if (data->interface_type == MUSB_INTERFACE_UTMI) otg_set_vbus(musb->xceiv->otg, 0); @@ -307,6 +350,9 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) dev_dbg(dev, "ID float\n"); } + if (!cable_connected) + omap2430_set_power(musb, glue->enabled, cable_connected); + atomic_notifier_call_chain(&musb->xceiv->notifier, musb->xceiv->last_event, NULL); } @@ -443,6 +489,8 @@ static void omap2430_musb_enable(struct musb *musb) struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev); struct omap_musb_board_data *data = pdata->board_data; + omap2430_set_power(musb, true, glue->cable_connected); + switch (glue->status) { case MUSB_ID_GROUND: @@ -481,6 +529,8 @@ static void omap2430_musb_disable(struct musb *musb) if (glue->status != MUSB_UNKNOWN) omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DISCONNECT); + + omap2430_set_power(musb, false, glue->cable_connected); } static int omap2430_musb_exit(struct musb *musb) @@ -653,11 +703,13 @@ err0: static int omap2430_remove(struct platform_device *pdev) { - struct omap2430_glue *glue = platform_get_drvdata(pdev); + struct omap2430_glue *glue = platform_get_drvdata(pdev); + struct musb *musb = glue_to_musb(glue); pm_runtime_get_sync(glue->dev); cancel_work_sync(&glue->omap_musb_mailbox_work); platform_device_unregister(glue->musb); + omap2430_set_power(musb, false, false); pm_runtime_put_sync(glue->dev); pm_runtime_disable(glue->dev); From a83e17d0f73b2c53ffd40773c227bf4b901365d8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:16 -0500 Subject: [PATCH 0145/1050] usb: musb: Improve PM runtime and phy handling for 2430 glue layer This simplifies things and allows idling both MUSB and PHY when nothing is configured. Let's just return early from PM runtime if musb is not yet initialized. Let's also warn if PHY is not configured. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index fa9932910969..b81216d1380b 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -435,6 +435,7 @@ static int omap2430_musb_init(struct musb *musb) return PTR_ERR(musb->phy); } musb->isr = omap2430_musb_interrupt; + phy_init(musb->phy); /* * Enable runtime PM for musb parent (this driver). We can't @@ -471,8 +472,6 @@ static int omap2430_musb_init(struct musb *musb) if (glue->status != MUSB_UNKNOWN) omap_musb_set_mailbox(glue); - phy_init(musb->phy); - phy_power_on(musb->phy); pm_runtime_put(glue->dev); return 0; @@ -489,6 +488,9 @@ static void omap2430_musb_enable(struct musb *musb) struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev); struct omap_musb_board_data *data = pdata->board_data; + if (!WARN_ON(!musb->phy)) + phy_power_on(musb->phy); + omap2430_set_power(musb, true, glue->cable_connected); switch (glue->status) { @@ -526,6 +528,9 @@ static void omap2430_musb_disable(struct musb *musb) struct device *dev = musb->controller; struct omap2430_glue *glue = dev_get_drvdata(dev->parent); + if (!WARN_ON(!musb->phy)) + phy_power_off(musb->phy); + if (glue->status != MUSB_UNKNOWN) omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DISCONNECT); @@ -535,11 +540,14 @@ static void omap2430_musb_disable(struct musb *musb) static int omap2430_musb_exit(struct musb *musb) { - del_timer_sync(&musb_idle_timer); + struct device *dev = musb->controller; + struct omap2430_glue *glue = dev_get_drvdata(dev->parent); + del_timer_sync(&musb_idle_timer); omap2430_low_level_exit(musb); - phy_power_off(musb->phy); phy_exit(musb->phy); + musb->phy = NULL; + cancel_work_sync(&glue->omap_musb_mailbox_work); return 0; } @@ -707,7 +715,6 @@ static int omap2430_remove(struct platform_device *pdev) struct musb *musb = glue_to_musb(glue); pm_runtime_get_sync(glue->dev); - cancel_work_sync(&glue->omap_musb_mailbox_work); platform_device_unregister(glue->musb); omap2430_set_power(musb, false, false); pm_runtime_put_sync(glue->dev); @@ -723,12 +730,13 @@ static int omap2430_runtime_suspend(struct device *dev) struct omap2430_glue *glue = dev_get_drvdata(dev); struct musb *musb = glue_to_musb(glue); - if (musb) { - musb->context.otg_interfsel = musb_readl(musb->mregs, - OTG_INTERFSEL); + if (!musb) + return 0; - omap2430_low_level_exit(musb); - } + musb->context.otg_interfsel = musb_readl(musb->mregs, + OTG_INTERFSEL); + + omap2430_low_level_exit(musb); return 0; } @@ -739,7 +747,7 @@ static int omap2430_runtime_resume(struct device *dev) struct musb *musb = glue_to_musb(glue); if (!musb) - return -EPROBE_DEFER; + return 0; omap2430_low_level_init(musb); musb_writel(musb->mregs, OTG_INTERFSEL, From 4dc2fe7a94d07c6148eea5722e23d4c1dbecc9e3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:17 -0500 Subject: [PATCH 0146/1050] usb: musb: Remove try_idle for 2430 glue layer This is no longer needed with PM runtime at least for 2430 glue. We can now rely only on PM runtime and cable detection. The other glue layers can probably remove try_idle too, but that needs to be tested for each platform before doing it. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 91 ------------------------------------- 1 file changed, 91 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index b81216d1380b..53653944cf72 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -57,92 +57,6 @@ struct omap2430_glue { static struct omap2430_glue *_glue; -static struct timer_list musb_idle_timer; - -static void musb_do_idle(unsigned long _musb) -{ - struct musb *musb = (void *)_musb; - unsigned long flags; - u8 power; - u8 devctl; - - spin_lock_irqsave(&musb->lock, flags); - - switch (musb->xceiv->otg->state) { - case OTG_STATE_A_WAIT_BCON: - - devctl = musb_readb(musb->mregs, MUSB_DEVCTL); - if (devctl & MUSB_DEVCTL_BDEVICE) { - musb->xceiv->otg->state = OTG_STATE_B_IDLE; - MUSB_DEV_MODE(musb); - } else { - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - MUSB_HST_MODE(musb); - } - break; - case OTG_STATE_A_SUSPEND: - /* finish RESUME signaling? */ - if (musb->port1_status & MUSB_PORT_STAT_RESUME) { - power = musb_readb(musb->mregs, MUSB_POWER); - power &= ~MUSB_POWER_RESUME; - dev_dbg(musb->controller, "root port resume stopped, power %02x\n", power); - musb_writeb(musb->mregs, MUSB_POWER, power); - musb->is_active = 1; - musb->port1_status &= ~(USB_PORT_STAT_SUSPEND - | MUSB_PORT_STAT_RESUME); - musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16; - usb_hcd_poll_rh_status(musb->hcd); - /* NOTE: it might really be A_WAIT_BCON ... */ - musb->xceiv->otg->state = OTG_STATE_A_HOST; - } - break; - case OTG_STATE_A_HOST: - devctl = musb_readb(musb->mregs, MUSB_DEVCTL); - if (devctl & MUSB_DEVCTL_BDEVICE) - musb->xceiv->otg->state = OTG_STATE_B_IDLE; - else - musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON; - default: - break; - } - spin_unlock_irqrestore(&musb->lock, flags); -} - - -static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout) -{ - unsigned long default_timeout = jiffies + msecs_to_jiffies(3); - static unsigned long last_timer; - - if (timeout == 0) - timeout = default_timeout; - - /* Never idle if active, or when VBUS timeout is not set as host */ - if (musb->is_active || ((musb->a_wait_bcon == 0) - && (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) { - dev_dbg(musb->controller, "%s active, deleting timer\n", - usb_otg_state_string(musb->xceiv->otg->state)); - del_timer(&musb_idle_timer); - last_timer = jiffies; - return; - } - - if (time_after(last_timer, timeout)) { - if (!timer_pending(&musb_idle_timer)) - last_timer = timeout; - else { - dev_dbg(musb->controller, "Longer idle timer already pending, ignoring\n"); - return; - } - } - last_timer = timeout; - - dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n", - usb_otg_state_string(musb->xceiv->otg->state), - (unsigned long)jiffies_to_msecs(timeout - jiffies)); - mod_timer(&musb_idle_timer, timeout); -} - static void omap2430_musb_set_vbus(struct musb *musb, int is_on) { struct usb_otg *otg = musb->xceiv->otg; @@ -467,8 +381,6 @@ static int omap2430_musb_init(struct musb *musb) musb_readl(musb->mregs, OTG_INTERFSEL), musb_readl(musb->mregs, OTG_SIMENABLE)); - setup_timer(&musb_idle_timer, musb_do_idle, (unsigned long) musb); - if (glue->status != MUSB_UNKNOWN) omap_musb_set_mailbox(glue); @@ -543,7 +455,6 @@ static int omap2430_musb_exit(struct musb *musb) struct device *dev = musb->controller; struct omap2430_glue *glue = dev_get_drvdata(dev->parent); - del_timer_sync(&musb_idle_timer); omap2430_low_level_exit(musb); phy_exit(musb->phy); musb->phy = NULL; @@ -562,8 +473,6 @@ static const struct musb_platform_ops omap2430_ops = { .exit = omap2430_musb_exit, .set_mode = omap2430_musb_set_mode, - .try_idle = omap2430_musb_try_idle, - .set_vbus = omap2430_musb_set_vbus, .enable = omap2430_musb_enable, From a118df07f5b19e4879fd67e98a69f4644136877f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:18 -0500 Subject: [PATCH 0147/1050] usb: musb: Don't set d+ high before enable for 2430 glue layer At least 2430 glue layer pulls d+ high on start up even if there are no gadgets configured. This is bad at least for anything using a separate battery charger chip as it can confuse the charger detection. Let's fix the issue by removing the bogus glue layer code tinkering with the SESSION bit. As pointed out Bin Liu and Sergei Shtylyov , the SESSION bit just starts host mode if ID pin is grounded, and starts the srp is ID pin is floating. So without the ID pin changing, it's unable to force musb mode to anything. And just for starting a host mode, things work fine without this code. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 53653944cf72..d312d42592d6 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -122,16 +122,6 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on) musb_readb(musb->mregs, MUSB_DEVCTL)); } -static int omap2430_musb_set_mode(struct musb *musb, u8 musb_mode) -{ - u8 devctl = musb_readb(musb->mregs, MUSB_DEVCTL); - - devctl |= MUSB_DEVCTL_SESSION; - musb_writeb(musb->mregs, MUSB_DEVCTL, devctl); - - return 0; -} - static inline void omap2430_low_level_exit(struct musb *musb) { u32 l; @@ -472,7 +462,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_mode = omap2430_musb_set_mode, .set_vbus = omap2430_musb_set_vbus, .enable = omap2430_musb_enable, From 12b7db2bf8b88938798c60416172b53225207b1f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:19 -0500 Subject: [PATCH 0148/1050] usb: musb: Return error value from musb_mailbox At least on n900 we have phy-twl4030-usb only generating cable interrupts, and then have a separate USB PHY. In order for musb to know the real cable status, we need to clear any cached state until musb is ready. Otherwise the cable status interrupts will get just ignored if the status does not change from the initial state. To do this, let's add a return value to musb_mailbox(), and reset cached linkstat to MUSB_UNKNOWN on error. Sorry to cause a bit of churn here, I should have added that already last time patching musb_mailbox(). Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-twl4030-usb.c | 14 ++++++++++---- drivers/usb/musb/musb_core.c | 7 ++++--- drivers/usb/musb/musb_core.h | 2 +- drivers/usb/musb/omap2430.c | 8 +++++--- drivers/usb/phy/phy-twl6030-usb.c | 12 +++++++++--- include/linux/usb/musb.h | 5 +++-- 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 6b6af6cba454..d9b10a39a2cf 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -463,7 +463,8 @@ static int twl4030_phy_power_on(struct phy *phy) twl4030_usb_set_mode(twl, twl->usb_mode); if (twl->usb_mode == T2_USB_MODE_ULPI) twl4030_i2c_access(twl, 0); - schedule_delayed_work(&twl->id_workaround_work, 0); + twl->linkstat = MUSB_UNKNOWN; + schedule_delayed_work(&twl->id_workaround_work, HZ); return 0; } @@ -537,6 +538,7 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) struct twl4030_usb *twl = _twl; enum musb_vbus_id_status status; bool status_changed = false; + int err; status = twl4030_usb_linkstat(twl); @@ -567,7 +569,9 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) pm_runtime_mark_last_busy(twl->dev); pm_runtime_put_autosuspend(twl->dev); } - musb_mailbox(status); + err = musb_mailbox(status); + if (err) + twl->linkstat = MUSB_UNKNOWN; } /* don't schedule during sleep - irq works right then */ @@ -595,7 +599,8 @@ static int twl4030_phy_init(struct phy *phy) struct twl4030_usb *twl = phy_get_drvdata(phy); pm_runtime_get_sync(twl->dev); - schedule_delayed_work(&twl->id_workaround_work, 0); + twl->linkstat = MUSB_UNKNOWN; + schedule_delayed_work(&twl->id_workaround_work, HZ); pm_runtime_mark_last_busy(twl->dev); pm_runtime_put_autosuspend(twl->dev); @@ -763,7 +768,8 @@ static int twl4030_usb_remove(struct platform_device *pdev) if (cable_present(twl->linkstat)) pm_runtime_put_noidle(twl->dev); pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put_sync_suspend(twl->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(twl->dev); pm_runtime_disable(twl->dev); /* autogate 60MHz ULPI clock, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 23888d579e8b..6469eff4fc30 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1679,7 +1679,7 @@ EXPORT_SYMBOL_GPL(musb_dma_completion); #define use_dma 0 #endif -static void (*musb_phy_callback)(enum musb_vbus_id_status status); +static int (*musb_phy_callback)(enum musb_vbus_id_status status); /* * musb_mailbox - optional phy notifier function @@ -1688,11 +1688,12 @@ static void (*musb_phy_callback)(enum musb_vbus_id_status status); * Optionally gets called from the USB PHY. Note that the USB PHY must be * disabled at the point the phy_callback is registered or unregistered. */ -void musb_mailbox(enum musb_vbus_id_status status) +int musb_mailbox(enum musb_vbus_id_status status) { if (musb_phy_callback) - musb_phy_callback(status); + return musb_phy_callback(status); + return -ENODEV; }; EXPORT_SYMBOL_GPL(musb_mailbox); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 29473846b098..b55a776b03eb 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -215,7 +215,7 @@ struct musb_platform_ops { dma_addr_t *dma_addr, u32 *len); void (*pre_root_reset_end)(struct musb *musb); void (*post_root_reset_end)(struct musb *musb); - void (*phy_callback)(enum musb_vbus_id_status status); + int (*phy_callback)(enum musb_vbus_id_status status); }; /* diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index d312d42592d6..2c54f52ae386 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -180,22 +180,24 @@ static void omap2430_set_power(struct musb *musb, bool enabled, bool cable) } } -static void omap2430_musb_mailbox(enum musb_vbus_id_status status) +static int omap2430_musb_mailbox(enum musb_vbus_id_status status) { struct omap2430_glue *glue = _glue; if (!glue) { pr_err("%s: musb core is not yet initialized\n", __func__); - return; + return -EPROBE_DEFER; } glue->status = status; if (!glue_to_musb(glue)) { pr_err("%s: musb core is not yet ready\n", __func__); - return; + return -EPROBE_DEFER; } schedule_work(&glue->omap_musb_mailbox_work); + + return 0; } static void omap_musb_set_mailbox(struct omap2430_glue *glue) diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index 24e2b3cf1867..c66a447c3dfe 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -227,12 +227,16 @@ static irqreturn_t twl6030_usb_irq(int irq, void *_twl) twl->asleep = 1; status = MUSB_VBUS_VALID; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; } else { if (twl->linkstat != MUSB_UNKNOWN) { status = MUSB_VBUS_OFF; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; if (twl->asleep) { regulator_disable(twl->usb3v3); twl->asleep = 0; @@ -264,7 +268,9 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl) twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_SET); status = MUSB_ID_GROUND; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; } else { twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_CLR); twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET); diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 0b3da40a525e..d315c8907869 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -142,10 +142,11 @@ enum musb_vbus_id_status { }; #if IS_ENABLED(CONFIG_USB_MUSB_HDRC) -void musb_mailbox(enum musb_vbus_id_status status); +int musb_mailbox(enum musb_vbus_id_status status); #else -static inline void musb_mailbox(enum musb_vbus_id_status status) +static inline int musb_mailbox(enum musb_vbus_id_status status) { + return 0; } #endif From 87326e858448c40e32f142c0b8dcc59d7b27c641 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:20 -0500 Subject: [PATCH 0149/1050] usb: musb: Remove extra PM runtime calls from 2430 glue layer With PM runtime behaving, these are all now unnecessary. Doing pm_runtime_get(musb->controller) will keep the parent glue layer also active. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 2c54f52ae386..22f15b026ab9 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -268,13 +268,8 @@ static void omap_musb_mailbox_work(struct work_struct *mailbox_work) { struct omap2430_glue *glue = container_of(mailbox_work, struct omap2430_glue, omap_musb_mailbox_work); - struct musb *musb = glue_to_musb(glue); - struct device *dev = musb->controller; - pm_runtime_get_sync(dev); omap_musb_set_mailbox(glue); - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); } static irqreturn_t omap2430_musb_interrupt(int irq, void *__hci) @@ -343,16 +338,6 @@ static int omap2430_musb_init(struct musb *musb) musb->isr = omap2430_musb_interrupt; phy_init(musb->phy); - /* - * Enable runtime PM for musb parent (this driver). We can't - * do it earlier as struct musb is not yet allocated and we - * need to touch the musb registers for runtime PM. - */ - pm_runtime_enable(glue->dev); - status = pm_runtime_get_sync(glue->dev); - if (status < 0) - goto err1; - l = musb_readl(musb->mregs, OTG_INTERFSEL); if (data->interface_type == MUSB_INTERFACE_UTMI) { @@ -376,11 +361,7 @@ static int omap2430_musb_init(struct musb *musb) if (glue->status != MUSB_UNKNOWN) omap_musb_set_mailbox(glue); - pm_runtime_put(glue->dev); return 0; - -err1: - return status; } static void omap2430_musb_enable(struct musb *musb) @@ -588,11 +569,9 @@ static int omap2430_probe(struct platform_device *pdev) goto err2; } - /* - * Note that we cannot enable PM runtime yet for this - * driver as we need struct musb initialized first. - * See omap2430_musb_init above. - */ + pm_runtime_enable(glue->dev); + pm_runtime_use_autosuspend(glue->dev); + pm_runtime_set_autosuspend_delay(glue->dev, 500); ret = platform_device_add(musb); if (ret) { @@ -618,6 +597,7 @@ static int omap2430_remove(struct platform_device *pdev) platform_device_unregister(glue->musb); omap2430_set_power(musb, false, false); pm_runtime_put_sync(glue->dev); + pm_runtime_dont_use_autosuspend(glue->dev); pm_runtime_disable(glue->dev); return 0; From 1c4d0b4e18068b49e0eb9bf5125ff25c6d1452cd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:21 -0500 Subject: [PATCH 0150/1050] usb: musb: Remove pm_runtime_set_irq_safe With the pull up being handled with delayed work, we can now finally remove pm_runtime_set_irq_safe that blocks the MUSB glue from idling. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 6469eff4fc30..0286a39ea01d 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2223,12 +2223,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) pm_runtime_mark_last_busy(musb->controller); pm_runtime_put_autosuspend(musb->controller); - /* - * For why this is currently needed, see commit 3e43a0725637 - * ("usb: musb: core: add pm_runtime_irq_safe()") - */ - pm_runtime_irq_safe(musb->controller); - return 0; fail5: From aec373c1e5d87d791525a675fe726c11d64b84a8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:22 -0500 Subject: [PATCH 0151/1050] usb: musb: Use normal module_init for 2430 glue There's no longer any need for custom initcall level for musb. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 22f15b026ab9..0b4cec940386 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -669,18 +669,8 @@ static struct platform_driver omap2430_driver = { }, }; +module_platform_driver(omap2430_driver); + MODULE_DESCRIPTION("OMAP2PLUS MUSB Glue Layer"); MODULE_AUTHOR("Felipe Balbi "); MODULE_LICENSE("GPL v2"); - -static int __init omap2430_init(void) -{ - return platform_driver_register(&omap2430_driver); -} -subsys_initcall(omap2430_init); - -static void __exit omap2430_exit(void) -{ - platform_driver_unregister(&omap2430_driver); -} -module_exit(omap2430_exit); From b6a619a883c38e3f01749afe13dd23395c94a058 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:23 -0500 Subject: [PATCH 0152/1050] usb: phy: Check initial state for twl6030 We need to check the state for the PHY with delayed_work as otherwise MUSB will get confused and idles immediately. Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-twl6030-usb.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index c66a447c3dfe..a72e8d670adc 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -97,6 +97,9 @@ struct twl6030_usb { struct regulator *usb3v3; + /* used to check initial cable status after probe */ + struct delayed_work get_status_work; + /* used to set vbus, in atomic path */ struct work_struct set_vbus_work; @@ -280,6 +283,15 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl) return IRQ_HANDLED; } +static void twl6030_status_work(struct work_struct *work) +{ + struct twl6030_usb *twl = container_of(work, struct twl6030_usb, + get_status_work.work); + + twl6030_usb_irq(twl->irq2, twl); + twl6030_usbotg_irq(twl->irq1, twl); +} + static int twl6030_enable_irq(struct twl6030_usb *twl) { twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET); @@ -290,8 +302,6 @@ static int twl6030_enable_irq(struct twl6030_usb *twl) REG_INT_MSK_LINE_C); twl6030_interrupt_unmask(TWL6030_CHARGER_CTRL_INT_MASK, REG_INT_MSK_STS_C); - twl6030_usb_irq(twl->irq2, twl); - twl6030_usbotg_irq(twl->irq1, twl); return 0; } @@ -377,6 +387,7 @@ static int twl6030_usb_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "could not create sysfs file\n"); INIT_WORK(&twl->set_vbus_work, otg_set_vbus_work); + INIT_DELAYED_WORK(&twl->get_status_work, twl6030_status_work); status = request_threaded_irq(twl->irq1, NULL, twl6030_usbotg_irq, IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | IRQF_ONESHOT, @@ -401,6 +412,7 @@ static int twl6030_usb_probe(struct platform_device *pdev) twl->asleep = 0; twl6030_enable_irq(twl); + schedule_delayed_work(&twl->get_status_work, HZ); dev_info(&pdev->dev, "Initialized TWL6030 USB module\n"); return 0; @@ -410,6 +422,7 @@ static int twl6030_usb_remove(struct platform_device *pdev) { struct twl6030_usb *twl = platform_get_drvdata(pdev); + cancel_delayed_work(&twl->get_status_work); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, REG_INT_MSK_LINE_C); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, From 84ac5d1140f716a616522f952734e850448d2556 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 31 May 2016 10:05:24 -0500 Subject: [PATCH 0153/1050] usb: musb: only restore devctl when session was set in backup If the session bit was not set in the backup of devctl register, restoring devctl would clear the session bit. Therefor, only restore devctl register when the session bit was set in the backup. This solves the device enumeration failure in otg mode exposed by commit 56f487c (PM / Runtime: Update last_busy in rpm_resume). Cc: # v4.2+ Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 0286a39ea01d..f824336def5c 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2418,7 +2418,8 @@ static void musb_restore_context(struct musb *musb) musb_writew(musb_base, MUSB_INTRTXE, musb->intrtxe); musb_writew(musb_base, MUSB_INTRRXE, musb->intrrxe); musb_writeb(musb_base, MUSB_INTRUSBE, musb->context.intrusbe); - musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl); + if (musb->context.devctl & MUSB_DEVCTL_SESSION) + musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl); for (i = 0; i < musb->config->num_eps; ++i) { struct musb_hw_ep *hw_ep; From 04471eb8c3158c0ad9df4b24da845a63b2e8f23a Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 31 May 2016 10:05:25 -0500 Subject: [PATCH 0154/1050] usb: musb: host: correct cppi dma channel for isoch transfer Incorrect cppi dma channel is referenced in musb_rx_dma_iso_cppi41(), which causes kernel NULL pointer reference oops later when calling cppi41_dma_channel_program(). Fixes: 069a3fd (usb: musb: Remove ifdefs for musb_host_rx in musb_host.c part1) Cc: # v4.2+ Reported-by: Matwey V. Kornilov Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index f02361dbfd52..f0931c418f83 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1549,7 +1549,7 @@ static int musb_rx_dma_iso_cppi41(struct dma_controller *dma, struct urb *urb, size_t len) { - struct dma_channel *channel = hw_ep->tx_channel; + struct dma_channel *channel = hw_ep->rx_channel; void __iomem *epio = hw_ep->regs; dma_addr_t *buf; u32 length, res; From f3eec0cf784e0d6c47822ca6b66df3d5812af7e6 Mon Sep 17 00:00:00 2001 From: Andrew Goodbody Date: Tue, 31 May 2016 10:05:26 -0500 Subject: [PATCH 0155/1050] usb: musb: Ensure rx reinit occurs for shared_fifo endpoints shared_fifo endpoints would only get a previous tx state cleared out, the rx state was only cleared for non shared_fifo endpoints Change this so that the rx state is cleared for all endpoints. This addresses an issue that resulted in rx packets being dropped silently. Signed-off-by: Andrew Goodbody Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index f0931c418f83..327f39c8c174 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -600,14 +600,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum) musb_writew(ep->regs, MUSB_TXCSR, 0); /* scrub all previous state, clearing toggle */ - } else { - csr = musb_readw(ep->regs, MUSB_RXCSR); - if (csr & MUSB_RXCSR_RXPKTRDY) - WARNING("rx%d, packet/%d ready?\n", ep->epnum, - musb_readw(ep->regs, MUSB_RXCOUNT)); - - musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG); } + csr = musb_readw(ep->regs, MUSB_RXCSR); + if (csr & MUSB_RXCSR_RXPKTRDY) + WARNING("rx%d, packet/%d ready?\n", ep->epnum, + musb_readw(ep->regs, MUSB_RXCOUNT)); + + musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG); /* target addr and (for multipoint) hub addr/port */ if (musb->is_multipoint) { From 7b2c17f829545df27a910e8d82e133c21c9a8c9c Mon Sep 17 00:00:00 2001 From: Andrew Goodbody Date: Tue, 31 May 2016 10:05:27 -0500 Subject: [PATCH 0156/1050] usb: musb: Stop bulk endpoint while queue is rotated Ensure that the endpoint is stopped by clearing REQPKT before clearing DATAERR_NAKTIMEOUT before rotating the queue on the dedicated bulk endpoint. This addresses an issue where a race could result in the endpoint receiving data before it was reprogrammed resulting in a warning about such data from musb_rx_reinit before it was thrown away. The data thrown away was a valid packet that had been correctly ACKed which meant the host and device got out of sync. Signed-off-by: Andrew Goodbody Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 327f39c8c174..d227a71d85e1 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -992,9 +992,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep, if (is_in) { dma = is_dma_capable() ? ep->rx_channel : NULL; - /* clear nak timeout bit */ + /* + * Need to stop the transaction by clearing REQPKT first + * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED + * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2 + */ rx_csr = musb_readw(epio, MUSB_RXCSR); rx_csr |= MUSB_RXCSR_H_WZC_BITS; + rx_csr &= ~MUSB_RXCSR_H_REQPKT; + musb_writew(epio, MUSB_RXCSR, rx_csr); rx_csr &= ~MUSB_RXCSR_DATAERROR; musb_writew(epio, MUSB_RXCSR, rx_csr); From ae4185cd13a3017259d30149e606f8c83857acf2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:48 +0200 Subject: [PATCH 0157/1050] MAINTAINERS: Add file patterns for clock device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-clk@vger.kernel.org Signed-off-by: Stephen Boyd --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..2334995860b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3086,6 +3086,7 @@ M: Stephen Boyd L: linux-clk@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git S: Maintained +F: Documentation/devicetree/bindings/clock/ F: drivers/clk/ X: drivers/clk/clkdev.c F: include/linux/clk-pr* From 72ad679aa7182d23d269cbe4d655f7e129d3b057 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 16 May 2016 12:45:36 -0300 Subject: [PATCH 0158/1050] clk: nxp: Select MFD_SYSCON for creg driver Commit 378523d15003 ("clk: add lpc18xx creg clk driver") added a new clock driver but missed the proper MFD_SYSCON select. Fix it. Fixes: 378523d15003 ("clk: add lpc18xx creg clk driver") Signed-off-by: Ezequiel Garcia Acked-by: Joachim Eastwood Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 53ddba26578c..98efbfcdb503 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -175,6 +175,7 @@ config COMMON_CLK_KEYSTONE config COMMON_CLK_NXP def_bool COMMON_CLK && (ARCH_LPC18XX || ARCH_LPC32XX) select REGMAP_MMIO if ARCH_LPC32XX + select MFD_SYSCON if ARCH_LPC18XX ---help--- Support for clock providers on NXP platforms. From 6b7e9cde49691e04314342b7dce90c67ad567fcc Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 12 May 2016 22:17:34 -0400 Subject: [PATCH 0159/1050] sd: Fix rw_max for devices that report an optimal xfer size For historic reasons, io_opt is in bytes and max_sectors in block layer sectors. This interface inconsistency is error prone and should be fixed. But for 4.4--4.7 let's make the unit difference explicit via a wrapper function. Fixes: d0eb20a863ba ("sd: Optimal I/O size is in bytes, not sectors") Cc: stable@vger.kernel.org # 4.4+ Reported-by: Fam Zheng Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Andrew Patterson Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 8 ++++---- drivers/scsi/sd.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f459dff30512..60bff78e9ead 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2867,10 +2867,10 @@ static int sd_revalidate_disk(struct gendisk *disk) if (sdkp->opt_xfer_blocks && sdkp->opt_xfer_blocks <= dev_max && sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS && - sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) - rw_max = q->limits.io_opt = - sdkp->opt_xfer_blocks * sdp->sector_size; - else + logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) { + q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); + rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); + } else rw_max = BLK_DEF_MAX_SECTORS; /* Combine with controller limits */ diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 654630bb7d0e..765a6f1ac1b7 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -151,6 +151,11 @@ static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blo return blocks << (ilog2(sdev->sector_size) - 9); } +static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks) +{ + return blocks * sdev->sector_size; +} + /* * A DIF-capable target device can be formatted with different * protection schemes. Currently 0 through 3 are defined: From bc9139d23f6b038e32bcd2dffdee70a8d76b3976 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 25 May 2016 10:56:24 +1000 Subject: [PATCH 0160/1050] drm/nouveau/bios/disp: fix handling of "match any protocol" entries As it turns out, a value of 0xff means "any protocol" and not "VGA". Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- .../gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h | 5 +++-- drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c | 13 +++++-------- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 12 ++++-------- drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c | 8 +++++--- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h index db10c11f0595..c5a6ebd5a478 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h @@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *); struct nvbios_ocfg { - u16 match; + u8 proto; + u8 flags; u16 clkcmp[2]; }; @@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx, u8 *ver, u8 *hdr, u8 *cnt, u8 *len); u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *); -u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type, +u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *); u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c index f0314664349c..5dd34382f55a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c @@ -76,6 +76,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl, mask |= 0x0001 << or; mask |= 0x0100 << head; + list_for_each_entry(outp, &disp->base.outp, head) { if ((outp->info.hasht & 0xff) == type && (outp->info.hashm & mask) == mask) { @@ -155,25 +156,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) if (!outp) return NULL; + *conf = (ctrl & 0x00000f00) >> 8; switch (outp->info.type) { case DCB_OUTPUT_TMDS: - *conf = (ctrl & 0x00000f00) >> 8; if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: - *conf = disp->sor.lvdsconf; + *conf |= disp->sor.lvdsconf; break; - case DCB_OUTPUT_DP: - *conf = (ctrl & 0x00000f00) >> 8; - break; - case DCB_OUTPUT_ANALOG: default: - *conf = 0x00ff; break; } - data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2); + data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8, + &ver, &hdr, &cnt, &len, &info2); if (data && id < 0xff) { data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk); if (data) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index 4226d2153b9c..fcb1b0c46d64 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) if (!outp) return NULL; + *conf = (ctrl & 0x00000f00) >> 8; if (outp->info.location == 0) { switch (outp->info.type) { case DCB_OUTPUT_TMDS: - *conf = (ctrl & 0x00000f00) >> 8; if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: - *conf = disp->sor.lvdsconf; + *conf |= disp->sor.lvdsconf; break; - case DCB_OUTPUT_DP: - *conf = (ctrl & 0x00000f00) >> 8; - break; - case DCB_OUTPUT_ANALOG: default: - *conf = 0x00ff; break; } } else { @@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) pclk = pclk / 2; } - data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2); + data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8, + &ver, &hdr, &cnt, &len, &info2); if (data && id < 0xff) { data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk); if (data) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c index a5e92135cd77..9efb1b48cd54 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c @@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, { u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len); if (data) { - info->match = nvbios_rd16(bios, data + 0x00); + info->proto = nvbios_rd08(bios, data + 0x00); + info->flags = nvbios_rd16(bios, data + 0x01); info->clkcmp[0] = nvbios_rd16(bios, data + 0x02); info->clkcmp[1] = nvbios_rd16(bios, data + 0x04); } @@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, } u16 -nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type, +nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info) { u16 data, idx = 0; while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) { - if (info->match == type) + if ((info->proto == proto || info->proto == 0xff) && + (info->flags == flags)) break; } return data; From 9057c8d75018f05bbc769d7b4602de3b8b20f8aa Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 27 May 2016 12:01:27 +1000 Subject: [PATCH 0161/1050] drm/nouveau/ltc/gm107-: fix typo in the address of NV_PLTCG_LTC0_LTS0_INTR Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c index e292f5679418..389fb13a1998 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c @@ -69,11 +69,11 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth) } static void -gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s) +gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s) { struct nvkm_subdev *subdev = <c->subdev; struct nvkm_device *device = subdev->device; - u32 base = 0x140000 + (c * 0x2000) + (s * 0x200); + u32 base = 0x140400 + (c * 0x2000) + (s * 0x200); u32 stat = nvkm_rd32(device, base + 0x00c); if (stat) { @@ -92,7 +92,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc) while (mask) { u32 s, c = __ffs(mask); for (s = 0; s < ltc->lts_nr; s++) - gm107_ltc_lts_isr(ltc, c, s); + gm107_ltc_intr_lts(ltc, c, s); mask &= ~(1 << c); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c index 2a29bfd5125a..e18e0dc19ec8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c @@ -46,7 +46,7 @@ static const struct nvkm_ltc_func gm200_ltc = { .oneinit = gm200_ltc_oneinit, .init = gm200_ltc_init, - .intr = gm107_ltc_intr, /*XXX: not validated */ + .intr = gm107_ltc_intr, .cbc_clear = gm107_ltc_cbc_clear, .cbc_wait = gm107_ltc_cbc_wait, .zbc = 16, From 383d0a419f8e63e3d65e706c3c515fa9505ce364 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Jun 2016 16:20:10 +1000 Subject: [PATCH 0162/1050] drm/nouveau/gr/gf100-: update sm error decoding from gk20a nvgpu headers Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 9513badb8220..ae9ab5b1ab97 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -949,22 +949,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc) } static const struct nvkm_enum gf100_mp_warp_error[] = { - { 0x00, "NO_ERROR" }, - { 0x01, "STACK_MISMATCH" }, + { 0x01, "STACK_ERROR" }, + { 0x02, "API_STACK_ERROR" }, + { 0x03, "RET_EMPTY_STACK_ERROR" }, + { 0x04, "PC_WRAP" }, { 0x05, "MISALIGNED_PC" }, - { 0x08, "MISALIGNED_GPR" }, - { 0x09, "INVALID_OPCODE" }, - { 0x0d, "GPR_OUT_OF_BOUNDS" }, - { 0x0e, "MEM_OUT_OF_BOUNDS" }, - { 0x0f, "UNALIGNED_MEM_ACCESS" }, + { 0x06, "PC_OVERFLOW" }, + { 0x07, "MISALIGNED_IMMC_ADDR" }, + { 0x08, "MISALIGNED_REG" }, + { 0x09, "ILLEGAL_INSTR_ENCODING" }, + { 0x0a, "ILLEGAL_SPH_INSTR_COMBO" }, + { 0x0b, "ILLEGAL_INSTR_PARAM" }, + { 0x0c, "INVALID_CONST_ADDR" }, + { 0x0d, "OOR_REG" }, + { 0x0e, "OOR_ADDR" }, + { 0x0f, "MISALIGNED_ADDR" }, { 0x10, "INVALID_ADDR_SPACE" }, - { 0x11, "INVALID_PARAM" }, + { 0x11, "ILLEGAL_INSTR_PARAM2" }, + { 0x12, "INVALID_CONST_ADDR_LDC" }, + { 0x13, "GEOMETRY_SM_ERROR" }, + { 0x14, "DIVERGENT" }, + { 0x15, "WARP_EXIT" }, {} }; static const struct nvkm_bitfield gf100_mp_global_error[] = { + { 0x00000001, "SM_TO_SM_FAULT" }, + { 0x00000002, "L1_ERROR" }, { 0x00000004, "MULTIPLE_WARP_ERRORS" }, - { 0x00000008, "OUT_OF_STACK_SPACE" }, + { 0x00000008, "PHYSICAL_STACK_OVERFLOW" }, + { 0x00000010, "BPT_INT" }, + { 0x00000020, "BPT_PAUSE" }, + { 0x00000040, "SINGLE_STEP_COMPLETE" }, + { 0x20000000, "ECC_SEC_ERROR" }, + { 0x40000000, "ECC_DED_ERROR" }, + { 0x80000000, "TIMEOUT" }, {} }; From f045f459d925138fe7d6193a8c86406bda7e49da Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Jun 2016 12:23:31 +1000 Subject: [PATCH 0163/1050] drm/nouveau/fbcon: fix out-of-bounds memory accesses Reported by KASAN. Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + drivers/gpu/drm/nouveau/nv04_fbcon.c | 7 ++----- drivers/gpu/drm/nouveau/nv50_fbcon.c | 6 ++---- drivers/gpu/drm/nouveau/nvc0_fbcon.c | 6 ++---- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 57aaf98a26f9..300ea03be8f0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -552,6 +552,7 @@ nouveau_fbcon_init(struct drm_device *dev) if (ret) goto fini; + fbcon->helper.fbdev->pixmap.buf_align = 4; return 0; fini: diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 0f3e4bb411cc..7d9248b8c664 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) uint32_t fg; uint32_t bg; uint32_t dsize; - uint32_t width; uint32_t *data = (uint32_t *)image->data; int ret; @@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 8); - dsize = ALIGN(width * image->height, 32) >> 5; - if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { fg = ((uint32_t *) info->pseudo_palette)[image->fg_color]; @@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) ((image->dx + image->width) & 0xffff)); OUT_RING(chan, bg); OUT_RING(chan, fg); - OUT_RING(chan, (image->height << 16) | width); + OUT_RING(chan, (image->height << 16) | image->width); OUT_RING(chan, (image->height << 16) | image->width); OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff)); + dsize = ALIGN(image->width * image->height, 32) >> 5; while (dsize) { int iter_len = dsize > 128 ? 128 : dsize; diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 33d9ee0fac40..1aeb698e9707 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) struct nouveau_fbdev *nfbdev = info->par; struct nouveau_drm *drm = nouveau_drm(nfbdev->dev); struct nouveau_channel *chan = drm->channel; - uint32_t width, dwords, *data = (uint32_t *)image->data; + uint32_t dwords, *data = (uint32_t *)image->data; uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel)); uint32_t *palette = info->pseudo_palette; int ret; @@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 32); - dwords = (width * image->height) >> 5; - BEGIN_NV04(chan, NvSub2D, 0x0814, 2); if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { @@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) OUT_RING(chan, 0); OUT_RING(chan, image->dy); + dwords = ALIGN(image->width * image->height, 32) >> 5; while (dwords) { int push = dwords > 2047 ? 2047 : dwords; diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index a0913359ac05..839f4c8c1805 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) struct nouveau_fbdev *nfbdev = info->par; struct nouveau_drm *drm = nouveau_drm(nfbdev->dev); struct nouveau_channel *chan = drm->channel; - uint32_t width, dwords, *data = (uint32_t *)image->data; + uint32_t dwords, *data = (uint32_t *)image->data; uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel)); uint32_t *palette = info->pseudo_palette; int ret; @@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 32); - dwords = (width * image->height) >> 5; - BEGIN_NVC0(chan, NvSub2D, 0x0814, 2); if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { @@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) OUT_RING (chan, 0); OUT_RING (chan, image->dy); + dwords = ALIGN(image->width * image->height, 32) >> 5; while (dwords) { int push = dwords > 2047 ? 2047 : dwords; From 77154fd969df749f5bf83f639af19fca199de033 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Jun 2016 12:42:32 +1000 Subject: [PATCH 0164/1050] drm/nouveau/core: swap the order of imem/fb Fixes a use-after-free reported by valgrind and KASAN. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index c612dc1f1eb4..126a85cc81bc 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -16,9 +16,9 @@ enum nvkm_devidx { NVKM_SUBDEV_MC, NVKM_SUBDEV_BUS, NVKM_SUBDEV_TIMER, + NVKM_SUBDEV_INSTMEM, NVKM_SUBDEV_FB, NVKM_SUBDEV_LTC, - NVKM_SUBDEV_INSTMEM, NVKM_SUBDEV_MMU, NVKM_SUBDEV_BAR, NVKM_SUBDEV_PMU, From bfa49cfc526201119623de6593d284c96563bede Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 14:16:50 +0100 Subject: [PATCH 0165/1050] net/ethoc: fix null dereference on error exit path priv is assigned to NULL however some of the early error exit paths to label 'free' dereference priv, causing a null pointer dereference. Move the label 'free' to just the free_netdev statement, and add a new exit path 'free2' for the error cases were clk_disable_unprepare needs calling before the final free. Fixes issue found by CoverityScan, CID#113260 Signed-off-by: Colin Ian King Reviewed-by: Max Filippov Signed-off-by: David S. Miller --- drivers/net/ethernet/ethoc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 41b010645100..4edb98c3c6c7 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1195,7 +1195,7 @@ static int ethoc_probe(struct platform_device *pdev) priv->mdio = mdiobus_alloc(); if (!priv->mdio) { ret = -ENOMEM; - goto free; + goto free2; } priv->mdio->name = "ethoc-mdio"; @@ -1208,7 +1208,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free; + goto free2; } ret = ethoc_mdio_probe(netdev); @@ -1241,9 +1241,10 @@ error2: error: mdiobus_unregister(priv->mdio); mdiobus_free(priv->mdio); -free: +free2: if (priv->clk) clk_disable_unprepare(priv->clk); +free: free_netdev(netdev); out: return ret; From 14b84e8654c89ed59f433654e6bb64c886d095cd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Jun 2016 15:29:13 +0200 Subject: [PATCH 0166/1050] qed: fix qed_fill_link() error handling gcc warns about qed_fill_link possibly accessing uninitialized data: drivers/net/ethernet/qlogic/qed/qed_main.c: In function 'qed_fill_link': drivers/net/ethernet/qlogic/qed/qed_main.c:1170:35: error: 'link_caps' may be used uninitialized in this function [-Werror=maybe-uninitialized] While this warning is only about the specific case of CONFIG_QED_SRIOV being disabled but the function getting called for a VF (which should never happen), another possibility is that qed_mcp_get_*() fails without returning data. This rearranges the code so we bail out in either of the two cases and print a warning instead of accessing the uninitialized data. The qed_link_output structure remains untouched in this case, but all callers first call memset() on it, so at least we are not leaking stack data then. As discussed, we also use a compile-time check to ensure we never use any of the VF code if CONFIG_QED_SRIOV is disabled, and the PCI device table is updated to no longer bind to virtual functions in that configuration. Signed-off-by: Arnd Bergmann Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 45 ++++++++++++++++---- drivers/net/ethernet/qlogic/qed/qed_sriov.h | 4 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 + 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 753064679bde..61cc6869fa65 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1105,6 +1105,39 @@ static int qed_get_port_type(u32 media_type) return port_type; } +static int qed_get_link_data(struct qed_hwfn *hwfn, + struct qed_mcp_link_params *params, + struct qed_mcp_link_state *link, + struct qed_mcp_link_capabilities *link_caps) +{ + void *p; + + if (!IS_PF(hwfn->cdev)) { + qed_vf_get_link_params(hwfn, params); + qed_vf_get_link_state(hwfn, link); + qed_vf_get_link_caps(hwfn, link_caps); + + return 0; + } + + p = qed_mcp_get_link_params(hwfn); + if (!p) + return -ENXIO; + memcpy(params, p, sizeof(*params)); + + p = qed_mcp_get_link_state(hwfn); + if (!p) + return -ENXIO; + memcpy(link, p, sizeof(*link)); + + p = qed_mcp_get_link_capabilities(hwfn); + if (!p) + return -ENXIO; + memcpy(link_caps, p, sizeof(*link_caps)); + + return 0; +} + static void qed_fill_link(struct qed_hwfn *hwfn, struct qed_link_output *if_link) { @@ -1116,15 +1149,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn, memset(if_link, 0, sizeof(*if_link)); /* Prepare source inputs */ - if (IS_PF(hwfn->cdev)) { - memcpy(¶ms, qed_mcp_get_link_params(hwfn), sizeof(params)); - memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link)); - memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn), - sizeof(link_caps)); - } else { - qed_vf_get_link_params(hwfn, ¶ms); - qed_vf_get_link_state(hwfn, &link); - qed_vf_get_link_caps(hwfn, &link_caps); + if (qed_get_link_data(hwfn, ¶ms, &link, &link_caps)) { + dev_warn(&hwfn->cdev->pdev->dev, "no link data available\n"); + return; } /* Set the link parameters to pass to protocol driver */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index c8667c65e685..c90b2b6ad969 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -12,11 +12,13 @@ #include "qed_vf.h" #define QED_VF_ARRAY_LENGTH (3) +#ifdef CONFIG_QED_SRIOV #define IS_VF(cdev) ((cdev)->b_is_vf) #define IS_PF(cdev) (!((cdev)->b_is_vf)) -#ifdef CONFIG_QED_SRIOV #define IS_PF_SRIOV(p_hwfn) (!!((p_hwfn)->cdev->p_iov_info)) #else +#define IS_VF(cdev) (0) +#define IS_PF(cdev) (1) #define IS_PF_SRIOV(p_hwfn) (0) #endif #define IS_PF_SRIOV_ALLOC(p_hwfn) (!!((p_hwfn)->pf_iov_info)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5d00d1404bfc..5733d1888223 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -87,7 +87,9 @@ static const struct pci_device_id qede_pci_tbl[] = { {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF}, +#ifdef CONFIG_QED_SRIOV {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF}, +#endif { 0 } }; From 42316a201a60be38b07db1ebc3a1633107ed7209 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:31:33 +0530 Subject: [PATCH 0167/1050] Revert "ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff" This reverts commit 10971638701dedadb58c88ce4d31c9375b224ed6. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL - so revert thw whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 3 ++- arch/arc/include/asm/spinlock.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 5f3dcbbc0cc9..75c8226317f4 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -36,7 +36,8 @@ " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ "4: ; --- success --- \n" \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 800e7c430ca5..a86cb84fad2a 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -260,7 +260,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ " \n" \ "4: ; --- done --- \n" \ From 819f3602dcbd6b021cd50e18f5d05da30bca5b07 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:33:29 +0530 Subject: [PATCH 0168/1050] Revert "ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle" This reverts commit b89aa12c177477e34caa722818536fb5d0bffd76. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/spinlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index a86cb84fad2a..5e01bdf968ea 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -279,7 +279,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ " scond %[LOCKED], [%[slock]] \n" /* acquire */ " bz 4f \n" /* done */ " \n" @@ -358,7 +358,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ " sub %[val], %[val], 1 \n" /* reader lock */ " scond %[val], [%[rwlock]] \n" " bz 4f \n" /* done */ @@ -427,7 +427,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ " mov %[val], %[WR_LOCKED] \n" " scond %[val], [%[rwlock]] \n" " bz 4f \n" From ed6aefed726a305bd36344e230d2a9e9301226fc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:35:09 +0530 Subject: [PATCH 0169/1050] Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff" This reverts commit e78fdfef84be13a5c2b8276e12203cdf24778596. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 - arch/arc/include/asm/atomic.h | 46 +---- arch/arc/include/asm/spinlock.h | 293 -------------------------------- 3 files changed, 4 insertions(+), 340 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index be9d0b5ae0cc..0d3e59f56974 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -389,11 +389,6 @@ config ARC_HAS_LLSC default y depends on !ARC_CANT_LLSC -config ARC_STAR_9000923308 - bool "Workaround for llock/scond livelock" - default n - depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC - config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 75c8226317f4..dd683995bc9d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -25,51 +25,17 @@ #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#ifdef CONFIG_ARC_STAR_9000923308 - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay = 1, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " bz 4f \n" \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - "4: ; --- success --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ - -#else /* !CONFIG_ARC_STAR_9000923308 */ - -#define SCOND_FAIL_RETRY_VAR_DEF - -#define SCOND_FAIL_RETRY_ASM \ - " bnz 1b \n" \ - -#define SCOND_FAIL_RETRY_VARS - -#endif - #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ __asm__ __volatile__( \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ [i] "ir" (i) \ : "cc"); \ @@ -78,8 +44,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -91,11 +56,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ : "cc"); \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 5e01bdf968ea..cded4a9b5438 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -20,11 +20,6 @@ #ifdef CONFIG_ARC_HAS_LLSC -/* - * A normal LLOCK/SCOND based system, w/o need for livelock workaround - */ -#ifndef CONFIG_ARC_STAR_9000923308 - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int val; @@ -238,294 +233,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) smp_mb(); } -#else /* CONFIG_ARC_STAR_9000923308 */ - -/* - * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping - * coherency transactions in the SCU. The exclusive line state keeps rotating - * among contenting cores leading to a never ending cycle. So break the cycle - * by deferring the retry of failed exclusive access (SCOND). The actual delay - * needed is function of number of contending cores as well as the unrelated - * coherency traffic from other cores. To keep the code simple, start off with - * small delay of 1 which would suffice most cases and in case of contention - * double the delay. Eventually the delay is sufficient such that the coherency - * pipeline is drained, thus a subsequent exclusive access would succeed. - */ - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - " \n" \ - "4: ; --- done --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - smp_mb(); - - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; - - smp_mb(); -} - -/* - * Read-write spinlocks, allowing multiple readers but only one writer. - * Unfair locking as Writers could be starved indefinitely by Reader(s) - */ - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * zero means writer holds the lock exclusively, deny Reader. - * Otherwise grant lock to first/subseq reader - * - * if (rw->counter > 0) { - * rw->counter--; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ - " sub %[val], %[val], 1 \n" /* reader lock */ - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ - " sub %[val], %[val], 1 \n" /* counter-- */ - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), - * deny writer. Otherwise if unlocked grant to writer - * Hence the claim that Linux rwlocks are unfair to writers. - * (can be starved for an indefinite time by readers). - * - * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { - * rw->counter = 0; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter++; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " add %[val], %[val], 1 \n" - " scond %[val], [%[rwlock]] \n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)) - : "memory", "cc"); - - smp_mb(); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " scond %[UNLOCKED], [%[rwlock]]\n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS - -#endif /* CONFIG_ARC_STAR_9000923308 */ - #else /* !CONFIG_ARC_HAS_LLSC */ static inline void arch_spin_lock(arch_spinlock_t *lock) From 7b7eba0f3515fca3296b8881d583f7c1042f5226 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: [PATCH 0170/1050] netfilter: x_tables: don't reject valid target size on some architectures Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c69c892231d7..2675d580c490 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -612,7 +612,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -694,7 +694,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, From 5473e0c426ffaeaa19734987b153c2a7f33b8706 Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:23 +0800 Subject: [PATCH 0171/1050] Btrfs: test_check_exists: Fix infinite loop when searching for free space entries On a ppc64 machine using 64K as the block size, assume that the RB tree at btrfs_free_space_ctl->free_space_offset contains following two entries: 1. A bitmap entry having an offset value of 0 and having the bits corresponding to the address range [128M+512K, 128M+768K] set. 2. An extent entry corresponding to the address range [128M-256K, 128M-128K] In such a scenario, test_check_exists() invoked for checking the existence of address range [128M+768K, 256M] can lead to an infinite loop as explained below: - Checking for the extent entry fails. - Checking for a bitmap entry results in the free space info in range [128M+512K, 128M+768K] beng returned. - rb_prev(info) returns NULL because the bitmap entry starting from offset 0 comes first in the RB tree. - current_node = bitmap node. - while (current_node) tmp = rb_next(bitmap_node);/*tmp is extent based free space entry*/ Since extent based free space entry's last address is smaller than the address being searched for (i.e. 128M+768K) we incorrectly again obtain the extent node as the "next right node" of the RB tree and thus end up looping infinitely. This patch fixes the issue by checking the "tmp" variable which point to the most recently searched free space node. Reviewed-by: Josef Bacik Reviewed-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c6dc1183f542..fa623359d5b8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3662,7 +3662,7 @@ have_info: if (tmp->offset + tmp->bytes < offset) break; if (offset + bytes < tmp->offset) { - n = rb_prev(&info->offset_index); + n = rb_prev(&tmp->offset_index); continue; } info = tmp; @@ -3676,7 +3676,7 @@ have_info: if (offset + bytes < tmp->offset) break; if (tmp->offset + tmp->bytes < offset) { - n = rb_next(&info->offset_index); + n = rb_next(&tmp->offset_index); continue; } info = tmp; From 0ef6447a3d2f014e49069c4da33f905ed803aa2a Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:24 +0800 Subject: [PATCH 0172/1050] Btrfs: Fix integer overflow when calculating bytes_per_bitmap On ppc64, bytes_per_bitmap will be (65536*8*65536). Hence append UL to fix integer overflow. Reviewed-by: Josef Bacik Reviewed-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 12 ++++++------ fs/btrfs/tests/free-space-tests.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index fa623359d5b8..2813ef0718a2 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -29,7 +29,7 @@ #include "inode-map.h" #include "volumes.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { @@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; - u32 bytes_per_bitmap; + u64 bytes_per_bitmap; bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; bitmap_start = offset - ctl->start; - bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); + bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; bitmap_start += ctl->start; @@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; - u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; - u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); + u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); - max_bitmaps = max_t(u32, max_bitmaps, 1); + max_bitmaps = max_t(u64, max_bitmaps, 1); ASSERT(ctl->total_bitmaps <= max_bitmaps); diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 0eeb8f3d6b67..f3756d6b9ba2 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,7 +22,7 @@ #include "../disk-io.h" #include "../free-space-cache.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) /* * This test just does basic sanity checking, making sure we can add an extent From b9ef22dedde08ab1b4ccd5f53344984c4dcb89f4 Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:25 +0800 Subject: [PATCH 0173/1050] Btrfs: self-tests: Support non-4k page size self-tests code assumes 4k as the sectorsize and nodesize. This commit fix hardcoded 4K. Enables the self-tests code to be executed on non-4k page sized systems (e.g. ppc64). Reviewed-by: Josef Bacik Signed-off-by: Feifei Xu Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 6 +- fs/btrfs/disk-io.c | 8 +- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent_io.c | 10 +- fs/btrfs/extent_io.h | 4 +- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/super.c | 16 +- fs/btrfs/tests/btrfs-tests.c | 6 +- fs/btrfs/tests/btrfs-tests.h | 27 +- fs/btrfs/tests/extent-buffer-tests.c | 11 +- fs/btrfs/tests/extent-io-tests.c | 27 +- fs/btrfs/tests/free-space-tests.c | 68 ++--- fs/btrfs/tests/free-space-tree-tests.c | 30 ++- fs/btrfs/tests/inode-tests.c | 348 ++++++++++++++----------- fs/btrfs/tests/qgroup-tests.c | 85 +++--- 15 files changed, 357 insertions(+), 293 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 427c36b430a6..46025688f1d0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); + eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start, + eb->len); if (!eb_rewin) { btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); @@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } else if (old_root) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - eb = alloc_dummy_extent_buffer(root->fs_info, logical); + eb = alloc_dummy_extent_buffer(root->fs_info, logical, + root->nodesize); } else { btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6628fca9f4ed..06ef433552b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1147,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr); + return alloc_test_extent_buffer(root->fs_info, bytenr, + root->nodesize); return alloc_extent_buffer(root->fs_info, bytenr); } @@ -1314,14 +1315,15 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, root, NULL, 1); + /* We don't use the stripesize in selftest, set it as sectorsize */ + __setup_root(nodesize, sectorsize, sectorsize, root, NULL, 1); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8e79d0070bcf..acba821499a9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, void btrfs_free_fs_root(struct btrfs_root *root); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -struct btrfs_root *btrfs_alloc_dummy_root(void); +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize); #endif /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3cd57825c75f..b0a554d4204a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4718,16 +4718,16 @@ err: } struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { unsigned long len; if (!fs_info) { /* * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 + * available */ - len = 4096; + len = nodesize; } else { len = fs_info->tree_root->nodesize; } @@ -4823,7 +4823,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4831,7 +4831,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(fs_info, start); + eb = alloc_dummy_extent_buffer(fs_info, start, nodesize); if (!eb) return NULL; eb->fs_info = fs_info; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1baf19c9b79d..c0c1c4fef6ce 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); @@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, u64 *end, u64 max_bytes); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); #endif diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2813ef0718a2..69d270f6602c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; if (bitmap_bytes >= max_bytes) { ctl->extents_thresh = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4e59a91a11e0..5b0b354ca594 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2319,27 +2319,31 @@ static void btrfs_print_mod_info(void) static int btrfs_run_sanity_tests(void) { int ret; + u32 sectorsize, nodesize; + sectorsize = PAGE_SIZE; + nodesize = PAGE_SIZE; ret = btrfs_init_test_fs(); if (ret) return ret; - ret = btrfs_test_free_space_cache(); + ret = btrfs_test_free_space_cache(sectorsize, nodesize); if (ret) goto out; - ret = btrfs_test_extent_buffer_operations(); + ret = btrfs_test_extent_buffer_operations(sectorsize, + nodesize); if (ret) goto out; - ret = btrfs_test_extent_io(); + ret = btrfs_test_extent_io(sectorsize, nodesize); if (ret) goto out; - ret = btrfs_test_inodes(); + ret = btrfs_test_inodes(sectorsize, nodesize); if (ret) goto out; - ret = btrfs_test_qgroups(); + ret = btrfs_test_qgroups(sectorsize, nodesize); if (ret) goto out; - ret = btrfs_test_free_space_tree(); + ret = btrfs_test_free_space_tree(sectorsize, nodesize); out: btrfs_destroy_test_fs(); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index f54bf450bad3..10eb249ef891 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root) } struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length) +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize) { struct btrfs_block_group_cache *cache; @@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length) cache->key.objectid = 0; cache->key.offset = length; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; - cache->full_stripe_len = 4096; + cache->sectorsize = sectorsize; + cache->full_stripe_len = sectorsize; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 054b8c73c951..66fb6b701eb7 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -26,27 +26,28 @@ struct btrfs_root; struct btrfs_trans_handle; -int btrfs_test_free_space_cache(void); -int btrfs_test_extent_buffer_operations(void); -int btrfs_test_extent_io(void); -int btrfs_test_inodes(void); -int btrfs_test_qgroups(void); -int btrfs_test_free_space_tree(void); +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); +int btrfs_test_inodes(u32 sectorsize, u32 nodesize); +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); int btrfs_init_test_fs(void); void btrfs_destroy_test_fs(void); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); void btrfs_free_dummy_root(struct btrfs_root *root); struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length); +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize); void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache); void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans); #else -static inline int btrfs_test_free_space_cache(void) +static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_extent_buffer_operations(void) +static inline int btrfs_test_extent_buffer_operations(u32 sectorsize, + u32 nodesize) { return 0; } @@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void) static inline void btrfs_destroy_test_fs(void) { } -static inline int btrfs_test_extent_io(void) +static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_inodes(void) +static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_qgroups(void) +static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_free_space_tree(void) +static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { return 0; } diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index f51963a8f929..17b110f39f06 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -22,7 +22,7 @@ #include "../extent_io.h" #include "../disk-io.h" -static int test_btrfs_split_item(void) +static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) { struct btrfs_path *path; struct btrfs_root *root; @@ -40,7 +40,7 @@ static int test_btrfs_split_item(void) test_msg("Running btrfs_split_item tests\n"); - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Could not allocate root\n"); return PTR_ERR(root); @@ -53,7 +53,8 @@ static int test_btrfs_split_item(void) return -ENOMEM; } - path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); + path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize, + nodesize); if (!eb) { test_msg("Could not allocate dummy buffer\n"); ret = -ENOMEM; @@ -222,8 +223,8 @@ out: return ret; } -int btrfs_test_extent_buffer_operations(void) +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize) { test_msg("Running extent buffer operation tests"); - return test_btrfs_split_item(); + return test_btrfs_split_item(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 55724607f79b..755fceb2e5f5 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -65,7 +65,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, return count; } -static int test_find_delalloc(void) +static int test_find_delalloc(u32 sectorsize) { struct inode *inode; struct extent_io_tree tmp; @@ -113,7 +113,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, 4095, NULL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -122,9 +122,9 @@ static int test_find_delalloc(void) test_msg("Should have found at least one delalloc\n"); goto out_bits; } - if (start != 0 || end != 4095) { - test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", - start, end); + if (start != 0 || end != (sectorsize - 1)) { + test_msg("Expected start 0 end %u, got start %llu end %llu\n", + sectorsize - 1, start, end); goto out_bits; } unlock_extent(&tmp, start, end); @@ -144,7 +144,7 @@ static int test_find_delalloc(void) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -172,7 +172,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - test_start = max_bytes + 4096; + test_start = max_bytes + sectorsize; locked_page = find_lock_page(inode->i_mapping, test_start >> PAGE_SHIFT); if (!locked_page) { @@ -351,14 +351,15 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return 0; } -static int test_eb_bitmaps(void) +static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) { - unsigned long len = PAGE_SIZE * 4; + unsigned long len; unsigned long *bitmap; struct extent_buffer *eb; int ret; test_msg("Running extent buffer bitmap tests\n"); + len = sectorsize * 4; bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { @@ -379,7 +380,7 @@ static int test_eb_bitmaps(void) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len); + eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { test_msg("Couldn't allocate test extent buffer\n"); kfree(bitmap); @@ -393,17 +394,17 @@ out: return ret; } -int btrfs_test_extent_io(void) +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { int ret; test_msg("Running extent I/O tests\n"); - ret = test_find_delalloc(); + ret = test_find_delalloc(sectorsize); if (ret) goto out; - ret = test_eb_bitmaps(); + ret = test_eb_bitmaps(sectorsize, nodesize); out: test_msg("Extent I/O tests finished\n"); return ret; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index f3756d6b9ba2..029343b3e854 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache) return 0; } -static int test_bitmaps(struct btrfs_block_group_cache *cache) +static int test_bitmaps(struct btrfs_block_group_cache *cache, + u32 sectorsize) { u64 next_bitmap_offset; int ret; @@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) * The first bitmap we have starts at offset 0 so the next one is just * at the end of the first bitmap. */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); /* Test a bit straddling two bitmaps */ ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M, @@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) } /* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache, + u32 sectorsize) { - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); int ret; test_msg("Running bitmap and extent tests\n"); @@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache) * requests. */ static int -test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) +test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache, + u32 sectorsize) { int ret; u64 offset; @@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096); + ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) { - test_msg("Cache free space is not 1Mb + 4Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", sectorsize); return -EINVAL; } @@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is a sectorsize free space region in a bitmap. + * Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 4096) { - test_msg("Cache free space is not 4Kb\n"); + if (cache->free_space_ctl->free_space != sectorsize) { + test_msg("Cache free space is not %u\n", sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 4096, 0, + 0, sectorsize, 0, &max_extent_size); if (offset != (SZ_128M + SZ_16M)) { - test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", - offset); + test_msg("Failed to allocate %u, returned offset : %llu\n", + sectorsize, offset); return -EINVAL; } @@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_32M, 8192); + ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Confirm that our extent entry didn't stole all free space from the - * bitmap, because of the small 8Kb free space region. + * bitmap, because of the small 2 * sectorsize free space region. */ ret = check_num_extents_and_bitmaps(cache, 2, 1); if (ret) @@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) { - test_msg("Cache free space is not 1Mb + 8Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize); return -EINVAL; } @@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is 2 * sectorsize free space region + * in a bitmap. Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 8192) { - test_msg("Cache free space is not 8Kb\n"); + if (cache->free_space_ctl->free_space != 2 * sectorsize) { + test_msg("Cache free space is not %u\n", 2 * sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 8192, 0, + 0, 2 * sectorsize, 0, &max_extent_size); if (offset != SZ_32M) { - test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", + test_msg("Failed to allocate %u, offset: %llu\n", + 2 * sectorsize, offset); return -EINVAL; } @@ -825,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return 0; } -int btrfs_test_free_space_cache(void) +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { struct btrfs_block_group_cache *cache; struct btrfs_root *root = NULL; @@ -833,13 +843,13 @@ int btrfs_test_free_space_cache(void) test_msg("Running btrfs free space cache tests\n"); - cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024); + cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024, sectorsize); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out; @@ -855,14 +865,14 @@ int btrfs_test_free_space_cache(void) ret = test_extents(cache); if (ret) goto out; - ret = test_bitmaps(cache); + ret = test_bitmaps(cache, sectorsize); if (ret) goto out; - ret = test_bitmaps_and_extents(cache); + ret = test_bitmaps_and_extents(cache, sectorsize); if (ret) goto out; - ret = test_steal_space_from_bitmap_to_extent(cache); + ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize); out: btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 7cea4462acd5..aac507085ab0 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include "btrfs-tests.h" #include "../ctree.h" #include "../disk-io.h" @@ -30,7 +31,7 @@ struct free_space_extent { * The test cases align their operations to this in order to hit some of the * edge cases in the bitmap code. */ -#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096) +#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE) static int __check_free_space_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *, struct btrfs_block_group_cache *, struct btrfs_path *); -static int run_test(test_func_t test_func, int bitmaps) +static int run_test(test_func_t test_func, int bitmaps, + u32 sectorsize, u32 nodesize) { struct btrfs_root *root = NULL; struct btrfs_block_group_cache *cache = NULL; @@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps) struct btrfs_path *path = NULL; int ret; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate dummy root\n"); ret = PTR_ERR(root); @@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps) root->fs_info->free_space_root = root; root->fs_info->tree_root = root; - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, + nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE); + cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize); if (!cache) { test_msg("Couldn't allocate dummy block group cache\n"); ret = -ENOMEM; @@ -534,17 +537,18 @@ out: return ret; } -static int run_test_both_formats(test_func_t test_func) +static int run_test_both_formats(test_func_t test_func, + u32 sectorsize, u32 nodesize) { int ret; - ret = run_test(test_func, 0); + ret = run_test(test_func, 0, sectorsize, nodesize); if (ret) return ret; - return run_test(test_func, 1); + return run_test(test_func, 1, sectorsize, nodesize); } -int btrfs_test_free_space_tree(void) +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { test_func_t tests[] = { test_empty_block_group, @@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void) test_msg("Running free space tree tests\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { - int ret = run_test_both_formats(tests[i]); + int ret = run_test_both_formats(tests[i], sectorsize, + nodesize); if (ret) { - test_msg("%pf failed\n", tests[i]); + test_msg("%pf : sectorsize %u failed\n", + tests[i], sectorsize); return ret; } } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 8a25fe8b7c45..29648c0a39f1 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include "btrfs-tests.h" #include "../ctree.h" #include "../btrfs_inode.h" @@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root) * diagram of how the extents will look though this may not be possible we still * want to make sure everything acts normally (the last number is not inclusive) * - * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] - * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] + * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291] + * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split] * - * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] - * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] + * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ] + * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written] * - * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] - * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] + * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635] + * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1] * - * [81920-86016] - * [ regular ] + * [69635-73731][ 73731 - 86019 ][86019-90115] + * [ regular ][ hole but no extent][ regular ] */ -static void setup_file_extents(struct btrfs_root *root) +static void setup_file_extents(struct btrfs_root *root, u32 sectorsize) { int slot = 0; u64 disk_bytenr = SZ_1M; @@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root) insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, slot); slot++; - offset = 4096; + offset = sectorsize; /* Now another hole */ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, @@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root) offset += 4; /* Now for a regular extent */ - insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - disk_bytenr += 4096; - offset += 4095; + disk_bytenr += sectorsize; + offset += sectorsize - 1; /* * Now for 3 extents that were split from a hole punch so we test * offsets properly. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, - 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now for a unwritten prealloc extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; + offset += sectorsize; /* * We want to jack up disk_bytenr a little more so the em stuff doesn't * merge our records. */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* * Now for a partially written prealloc extent, basically the same as * the hole punch example above. Ram_bytes never changes when you mark * extents written btw. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + slot++; + offset += sectorsize; + insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize, + disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, + slot); + slot++; + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); - slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now a normal compressed extent */ - insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; + offset += 2 * sectorsize; /* No merges */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* Now a split compressed extent */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, + disk_bytenr + sectorsize, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; - disk_bytenr += 8192; + offset += 2 * sectorsize; + disk_bytenr += 2 * sectorsize; /* Now extents that have a hole but no hole extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 16384; - disk_bytenr += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += 4 * sectorsize; + disk_bytenr += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); } static unsigned long prealloc_only = 0; static unsigned long compressed_only = 0; static unsigned long vacancy_only = 0; -static noinline int test_btrfs_get_extent(void) +static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void) /* First with no extents */ BTRFS_I(inode)->root = root; - em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0); if (IS_ERR(em)) { em = NULL; test_msg("Got an error when we shouldn't have\n"); @@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void) * setup_file_extents, so if you change anything there you need to * update the comment and update the expected values below. */ - setup_file_extents(root); + setup_file_extents(root, sectorsize); em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); if (IS_ERR(em)) { @@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected an inline, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4091) { + + if (em->start != offset || em->len != (sectorsize - 5)) { test_msg("Unexpected extent wanted start %llu len 1, got start " "%llu len %llu\n", offset, em->start, em->len); goto out; @@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Regular extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4095) { + if (em->start != offset || em->len != sectorsize - 1) { test_msg("Unexpected extent wanted start %llu len 4095, got " "start %llu len %llu\n", offset, em->start, em->len); goto out; @@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are split extents */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Now for the compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Split compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void) disk_bytenr, em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void) * length of the actual hole, if this changes we'll have to change this * test. */ - if (em->start != offset || em->len != 12288) { - test_msg("Unexpected extent wanted start %llu len 12288, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 3 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 3 * sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -815,7 +838,7 @@ out: return ret; } -static int test_hole_first(void) +static int test_hole_first(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -832,7 +855,7 @@ static int test_hole_first(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -844,7 +867,7 @@ static int test_hole_first(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -861,9 +884,9 @@ static int test_hole_first(void) * btrfs_get_extent. */ insert_inode_item_key(root); - insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); + insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1); + em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -872,9 +895,10 @@ static int test_hole_first(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != 0 || em->len != 4096) { - test_msg("Unexpected extent wanted start 0 len 4096, got start " - "%llu len %llu\n", em->start, em->len); + if (em->start != 0 || em->len != sectorsize) { + test_msg("Unexpected extent wanted start 0 len %u, " + "got start %llu len %llu\n", + sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -884,18 +908,19 @@ static int test_hole_first(void) } free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); + em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; } - if (em->block_start != 4096) { + if (em->block_start != sectorsize) { test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != 4096 || em->len != 4096) { - test_msg("Unexpected extent wanted start 4096 len 4096, got " - "start %llu len %llu\n", em->start, em->len); + if (em->start != sectorsize || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %u len %u, " + "got start %llu len %llu\n", + sectorsize, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -912,7 +937,7 @@ out: return ret; } -static int test_extent_accounting(void) +static int test_extent_accounting(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -924,7 +949,7 @@ static int test_extent_accounting(void) return ret; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -954,10 +979,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, - BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -969,10 +995,10 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_KERNEL); @@ -987,10 +1013,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + + sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -1004,16 +1031,17 @@ static int test_extent_accounting(void) } /* - * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] * * I'm artificially adding 2 to outstanding_extents because in the * buffered IO case we'd add things up as we go, but I don't feel like * doing that here, this isn't the interesting case we want to test. */ BTRFS_I(inode)->outstanding_extents += 2; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, - (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, - NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, + (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1025,10 +1053,13 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + /* + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] + */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1042,8 +1073,8 @@ static int test_extent_accounting(void) /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, NULL, GFP_KERNEL); @@ -1063,8 +1094,9 @@ static int test_extent_accounting(void) * might fail and I'd rather satisfy my paranoia at this point. */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1103,7 +1135,7 @@ out: return ret; } -int btrfs_test_inodes(void) +int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { int ret; @@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void) set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); test_msg("Running btrfs_get_extent tests\n"); - ret = test_btrfs_get_extent(); + ret = test_btrfs_get_extent(sectorsize, nodesize); if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - ret = test_hole_first(); + ret = test_hole_first(sectorsize, nodesize); if (ret) return ret; test_msg("Running outstanding_extents tests\n"); - return test_extent_accounting(); + return test_extent_accounting(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 8aa4ded31326..9c1d27352f19 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include "btrfs-tests.h" #include "../ctree.h" #include "../transaction.h" @@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, return ret; } -static int test_no_shared_qgroup(struct btrfs_root *root) +static int test_no_shared_qgroup(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -238,18 +240,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * we can only call btrfs_qgroup_account_extent() directly to test * quota. */ - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, 5); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -257,32 +259,32 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } old_roots = NULL; new_roots = NULL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_item(root, 4096, 4096); + ret = remove_extent_item(root, nodesize, nodesize); if (ret) return -EINVAL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -290,8 +292,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return -EINVAL; @@ -310,7 +312,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * right, also remove one of the roots and make sure the exclusive count is * adjusted properly. */ -static int test_multiple_refs(struct btrfs_root *root) +static int test_multiple_refs(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -329,18 +332,18 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, 5); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -348,30 +351,31 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, 5, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = add_tree_ref(root, 4096, 4096, 0, 256); + ret = add_tree_ref(root, nodesize, nodesize, 0, 256); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -379,35 +383,35 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, 256, nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_ref(root, 4096, 4096, 0, 256); + ret = remove_extent_ref(root, nodesize, nodesize, 0, 256); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -415,8 +419,8 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; @@ -427,7 +431,7 @@ static int test_multiple_refs(struct btrfs_root *root) return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -435,13 +439,13 @@ static int test_multiple_refs(struct btrfs_root *root) return 0; } -int btrfs_test_qgroups(void) +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; struct btrfs_root *tmp_root; int ret = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); return PTR_ERR(root); @@ -468,7 +472,8 @@ int btrfs_test_qgroups(void) * Can't use bytenr 0, some things freak out * *cough*backref walking code*cough* */ - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, nodesize, + nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -476,9 +481,9 @@ int btrfs_test_qgroups(void) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); @@ -493,7 +498,7 @@ int btrfs_test_qgroups(void) goto out; } - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); @@ -508,10 +513,10 @@ int btrfs_test_qgroups(void) } test_msg("Running qgroup tests\n"); - ret = test_no_shared_qgroup(root); + ret = test_no_shared_qgroup(root, sectorsize, nodesize); if (ret) goto out; - ret = test_multiple_refs(root); + ret = test_multiple_refs(root, sectorsize, nodesize); out: btrfs_free_dummy_root(root); return ret; From 87c279e613f848c691111b29d49de8df3f4f56da Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Jun 2016 22:18:48 -0700 Subject: [PATCH 0174/1050] blk-mq: really fix plug list flushing for nomerge queues Commit 0809e3ac6231 ("block: fix plug list flushing for nomerge queues") updated blk_mq_make_request() to set request_count even when blk_queue_nomerges() returns true. However, blk_mq_make_request() only does limited plugging and doesn't use request_count; blk_sq_make_request() is the one that should have been fixed. Do that and get rid of the unnecessary work in the mq version. Fixes: 0809e3ac6231 ("block: fix plug list flushing for nomerge queues") Signed-off-by: Omar Sandoval Reviewed-by: Ming Lei Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/blk-mq.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 29cbc1b5fbdb..f9b9049b1284 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1262,12 +1262,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q)) { - if (blk_attempt_plug_merge(q, bio, &request_count, - &same_queue_rq)) - return BLK_QC_T_NONE; - } else - request_count = blk_plug_queued_count(q); + if (!is_flush_fua && !blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) + return BLK_QC_T_NONE; rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) @@ -1358,9 +1355,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return BLK_QC_T_NONE; + if (!is_flush_fua && !blk_queue_nomerges(q)) { + if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) + return BLK_QC_T_NONE; + } else + request_count = blk_plug_queued_count(q); rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) From f55d84b07c4e7340473a25dc82b462607578402c Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 1 Jun 2016 08:53:48 -0700 Subject: [PATCH 0175/1050] stmmac: do not sleep in atomic context for mdio_reset stmmac_mdio_reset() has been updated to use msleep rather udelay (as some PHY requires a one second delay there). It called from stmmac_resume() within the spin_lock_irqsave block atomic context triggering 'scheduling while atomic'. The stmmac_priv lock usage is not fully documented, but it seems to protect the access to the MAC registers / DMA structures rather than the MDIO bus or the PHY (which have separate locking), so we can push the spin_lock after the stmmac_mdio_reset call. Signed-off-by: Vincent Palatin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index eac45d0c75e2..a473c182c91d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3450,8 +3450,6 @@ int stmmac_resume(struct device *dev) if (!netif_running(ndev)) return 0; - spin_lock_irqsave(&priv->lock, flags); - /* Power Down bit, into the PM register, is cleared * automatically as soon as a magic packet or a Wake-up frame * is received. Anyway, it's better to manually clear @@ -3459,7 +3457,9 @@ int stmmac_resume(struct device *dev) * from another devices (e.g. serial console). */ if (device_may_wakeup(priv->device)) { + spin_lock_irqsave(&priv->lock, flags); priv->hw->mac->pmt(priv->hw, 0); + spin_unlock_irqrestore(&priv->lock, flags); priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); @@ -3473,6 +3473,8 @@ int stmmac_resume(struct device *dev) netif_device_attach(ndev); + spin_lock_irqsave(&priv->lock, flags); + priv->cur_rx = 0; priv->dirty_rx = 0; priv->dirty_tx = 0; From 9bd616e3dbedfc103f158197c8ad93678849b1ed Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jun 2016 18:52:16 +0100 Subject: [PATCH 0176/1050] cpuidle: Do not access cpuidle_devices when !CONFIG_CPU_IDLE The cpuidle_devices per-CPU variable is only defined when CPU_IDLE is enabled. Commit c8cc7d4de7a4 ("sched/idle: Reorganize the idle loop") removed the #ifdef CONFIG_CPU_IDLE around cpuidle_idle_call() with the compiler optimising away __this_cpu_read(cpuidle_devices). However, with CONFIG_UBSAN && !CONFIG_CPU_IDLE, this optimisation no longer happens and the kernel fails to link since cpuidle_devices is not defined. This patch introduces an accessor function for the current CPU cpuidle device (returning NULL when !CONFIG_CPU_IDLE) and uses it in cpuidle_idle_call(). Signed-off-by: Catalin Marinas Cc: 4.5+ # 4.5+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 3 +++ kernel/sched/idle.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 786ad32631a6..07b83d32f66c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +static inline struct cpuidle_device *cpuidle_get_device(void) +{return __this_cpu_read(cpuidle_devices); } #else static inline void disable_cpuidle(void) { } static inline bool cpuidle_not_available(struct cpuidle_driver *drv, @@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } +static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bd12c6c714ec..c5aeedf4e93a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ static void cpuidle_idle_call(void) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; From fcdec35e54dc8a7659f59482e23aad1fe9c3ef5a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 24 Mar 2016 15:40:25 -0500 Subject: [PATCH 0177/1050] MAINTAINERS: DeviceTree maintainer updates Grant stepped down as kernel DT maintainer and his linaro.org email will be bouncing soon, so remove him now. Pawel, Ian and Kumar either said they don't want to remain maintainers or didn't reply, so removing them as binding maintainers. Update the DT git tree to mine. Grant's has not been active for a while now. I'm actively using patchwork for binding review tracking, so add its URL. Cc: Grant Likely Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: Frank Rowand Signed-off-by: Rob Herring --- MAINTAINERS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..e60872e38d13 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8404,10 +8404,9 @@ F: drivers/i2c/busses/i2c-ocores.c OPEN FIRMWARE AND FLATTENED DEVICE TREE M: Rob Herring M: Frank Rowand -M: Grant Likely L: devicetree@vger.kernel.org W: http://www.devicetree.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/glikely/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git S: Maintained F: drivers/of/ F: include/linux/of*.h @@ -8415,12 +8414,10 @@ F: scripts/dtc/ OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS M: Rob Herring -M: Pawel Moll M: Mark Rutland -M: Ian Campbell -M: Kumar Gala L: devicetree@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git +Q: http://patchwork.ozlabs.org/project/devicetree-bindings/list/ S: Maintained F: Documentation/devicetree/ F: arch/*/boot/dts/ From ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2016 14:52:43 -0700 Subject: [PATCH 0178/1050] Possible problem with e6afc8ac ("udp: remove headers from UDP packets before queueing") Paul Moore tracked a regression caused by a recent commit, which mistakenly assumed that sk_filter() could be avoided if socket had no current BPF filter. The intent was to avoid udp_lib_checksum_complete() overhead. But sk_filter() also checks skb_pfmemalloc() and security_sock_rcv_skb(), so better call it. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Reported-by: Paul Moore Tested-by: Paul Moore Tested-by: Stephen Smalley Cc: samanthakumar Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d56c0559b477..0ff31d97d485 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1618,12 +1618,12 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2da1896af934..f421c9f23c5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -653,12 +653,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto csum_error; + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { From 5d2be1422e02ccd697ccfcd45c85b4a26e6178e2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:04:56 -0400 Subject: [PATCH 0179/1050] tipc: fix an infoleak in tipc_nl_compat_link_dump link_info.str is a char array of size 60. Memory after the NULL byte is not initialized. Sending the whole object out can cause a leak. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f795b1dd0ccd..3ad9fab1985f 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); From 4116def2337991b39919f3b448326e21c40e0dbb Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:11:20 -0400 Subject: [PATCH 0180/1050] rds: fix an infoleak in rds_inc_info_copy The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rds/recv.c b/net/rds/recv.c index c0be1ecd11c9..8413f6c99e13 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -561,5 +561,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } From 121b78e679ee3ffab780115e260b2775d0cc1f73 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 May 2016 08:41:08 +0300 Subject: [PATCH 0181/1050] drm/amdkfd: unbind only existing processes When unbinding a process from a device (initiated by amd_iommu_v2), the driver needs to make sure that process still exists in the process table. There is a possibility that amdkfd's own notifier handler - kfd_process_notifier_release() - was called before the unbind function and it already removed the process from the process table. v2: Because there can be only one process with the specified pasid, and because *p can't be NULL inside the hash_for_each_rcu macro, it is more reasonable to just put the whole code inside the if statement that compares the pasid value. That way, when we exit hash_for_each_rcu, we simply exit the function as well. Signed-off-by: Oded Gabbay CC: Stable --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 76 ++++++++++++++---------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ac005796b71c..a64bc619ed82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -404,42 +404,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) idx = srcu_read_lock(&kfd_processes_srcu); + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) - if (p->pasid == pasid) - break; + if (p->pasid == pasid) { + + srcu_read_unlock(&kfd_processes_srcu, idx); + + pr_debug("Unbinding process %d from IOMMU\n", pasid); + + mutex_lock(&p->mutex); + + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(dev->dbgmgr); + + pqm_uninit(&p->pqm); + + pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + + if (pdd->reset_wavefronts) { + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + } + + /* + * Just mark pdd as unbound, because we still need it + * to call amd_iommu_unbind_pasid() in when the + * process exits. + * We don't call amd_iommu_unbind_pasid() here + * because the IOMMU called us. + */ + pdd->bound = false; + + mutex_unlock(&p->mutex); + + return; + } srcu_read_unlock(&kfd_processes_srcu, idx); - - BUG_ON(p->pasid != pasid); - - mutex_lock(&p->mutex); - - if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(dev->dbgmgr); - - pqm_uninit(&p->pqm); - - pdd = kfd_get_process_device_data(dev, p); - - if (!pdd) { - mutex_unlock(&p->mutex); - return; - } - - if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } - - /* - * Just mark pdd as unbound, because we still need it to call - * amd_iommu_unbind_pasid() in when the process exits. - * We don't call amd_iommu_unbind_pasid() here - * because the IOMMU called us. - */ - pdd->bound = false; - - mutex_unlock(&p->mutex); } struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) From bc4755a4bd1845ef6e88ac8c62f12e05bb530256 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 May 2016 08:41:48 +0300 Subject: [PATCH 0182/1050] drm/amdkfd: destroy dbgmgr in notifier release amdkfd need to destroy the debug manager in case amdkfd's notifier function is called before the unbind function, because in that case, the unbind function will exit without destroying debug manager. Signed-off-by: Oded Gabbay CC: Stable --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a64bc619ed82..7708d90b9da9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -242,13 +242,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, pqm_uninit(&p->pqm); /* Iterate over all process device data structure and check - * if we should reset all wavefronts */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) + * if we should delete debug managers and reset all wavefronts + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + if ((pdd->dev->dbgmgr) && + (pdd->dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(pdd->dev->dbgmgr); + if (pdd->reset_wavefronts) { pr_warn("amdkfd: Resetting all wave fronts\n"); dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; } + } mutex_unlock(&p->mutex); From 0fbbbf8b599ff840ff1a3c0cc00dd67ba8a52c9c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 29 May 2016 08:21:53 +0300 Subject: [PATCH 0183/1050] drm/amdkfd: print once about mem_banks truncation This print can really spam the kernel log in case we are truncating mem_banks, so just print this info once. It should also not be classified as warning. Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 74909e72a009..884c96f50c3d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_count); if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_warn("kfd: mem_banks_count truncated from %d to %d\n", + pr_info_once("kfd: mem_banks_count truncated from %d to %d\n", dev->node_props.mem_banks_count, dev->mem_bank_count); sysfs_show_32bit_prop(buffer, "mem_banks_count", From 31b2a32f708bb33b3f35b03ce3d2cb31f7d1e684 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 18 May 2016 15:28:06 +0300 Subject: [PATCH 0184/1050] phy: ti-pipe3: Program the DPLL even if it was already locked If bootloader has set a wrong DPLL then we must trash those values and re-program it anyways. This fixes USB3 devices not being enumerated on beagle-x15 if usb was started in u-boot. We don't re-program SATA DPLL if it is locked as it was causing SATA failures if device was hotpluged after boot. Reported-by: Robert Nelson Signed-off-by: Roger Quadros Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-ti-pipe3.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c index 0a477d24cf76..bf46844dc387 100644 --- a/drivers/phy/phy-ti-pipe3.c +++ b/drivers/phy/phy-ti-pipe3.c @@ -293,11 +293,18 @@ static int ti_pipe3_init(struct phy *x) ret = ti_pipe3_dpll_wait_lock(phy); } - /* Program the DPLL only if not locked */ + /* SATA has issues if re-programmed when locked */ val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS); - if (!(val & PLL_LOCK)) - if (ti_pipe3_dpll_program(phy)) - return -EINVAL; + if ((val & PLL_LOCK) && of_device_is_compatible(phy->dev->of_node, + "ti,phy-pipe3-sata")) + return ret; + + /* Program the DPLL */ + ret = ti_pipe3_dpll_program(phy); + if (ret) { + ti_pipe3_disable_clocks(phy); + return -EINVAL; + } return ret; } From 55eed755c6e30a89be3a791a6b0ad208aadd9bdc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 May 2016 13:11:17 +0200 Subject: [PATCH 0185/1050] locking/seqcount: Re-fix raw_read_seqcount_latch() Commit 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") broke raw_read_seqcount_latch(). If you look at the comment that was modified; the thing that changes is the seq count, not the latch pointer. * void latch_modify(struct latch_struct *latch, ...) * { * smp_wmb(); <- Ensure that the last data[1] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[0], ...); * * smp_wmb(); <- Ensure that the data[0] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[1], ...); * } * * The query will have a form like: * * struct entry *latch_query(struct latch_struct *latch, ...) * { * struct entry *entry; * unsigned seq, idx; * * do { * seq = lockless_dereference(latch->seq); So here we have: seq = READ_ONCE(latch->seq); smp_read_barrier_depends(); Which is exactly what we want; the new code: seq = ({ p = READ_ONCE(latch); smp_read_barrier_depends(); p })->seq; is just wrong; because it looses the volatile read on seq, which can now be torn or worse 'optimized'. And the read_depend barrier is also placed wrong, we want it after the load of seq, to match the above data[] up-to-date wmb()s. Such that when we dereference latch->data[] below, we're guaranteed to observe the right data. * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * smp_rmb(); * } while (seq != latch->seq); * * return entry; * } So yes, not passing a pointer is not pretty, but the code was correct, and isn't anymore now. Change to explicit READ_ONCE()+smp_read_barrier_depends() to avoid confusion and allow strict lockless_dereference() checking. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") Link: http://lkml.kernel.org/r/20160527111117.GL3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/seqlock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7973a821ac58..ead97654c4e9 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s)->sequence; + int seq = READ_ONCE(s->sequence); + /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ + smp_read_barrier_depends(); + return seq; } /** @@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch)->seq; + * seq = raw_read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); From 0422e83d84ae24b933e4b0d4c1e0f0b4ae8a0a3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 May 2016 21:08:17 +0100 Subject: [PATCH 0186/1050] locking/ww_mutex: Report recursive ww_mutex locking early Recursive locking for ww_mutexes was originally conceived as an exception. However, it is heavily used by the DRM atomic modesetting code. Currently, the recursive deadlock is checked after we have queued up for a busy-spin and as we never release the lock, we spin until kicked, whereupon the deadlock is discovered and reported. A simple solution for the now common problem is to move the recursive deadlock discovery to the first action when taking the ww_mutex. Suggested-by: Maarten Lankhorst Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Maarten Lankhorst Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464293297-19777-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..79d2d765a75f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) if (!hold_ctx) return 0; - if (unlikely(ctx == hold_ctx)) - return -EALREADY; - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { #ifdef CONFIG_DEBUG_MUTEXES @@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, unsigned long flags; int ret; + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } + preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); From 3b94a891667c30fb4624221497d77fc65d950345 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 27 May 2016 04:12:20 -0700 Subject: [PATCH 0187/1050] perf/x86/intel/uncore: Remove SBOX support for Broadwell server There was a report that on certain Broadwell-EP systems writing any bit of the SBOX PMU initialization MSR would #GP at boot. This did not happen on all systems. My test systems booted fine. Considering both DE and EP may have such issues, this patch removes SBOX support for all Broadwell platforms for now. Reported-and-tested-by: Mark van Dijk Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1464347540-5763-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b2625867ebd1..874e8bd64d1d 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2868,27 +2868,10 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; -static struct intel_uncore_type bdx_uncore_sbox = { - .name = "sbox", - .num_counters = 4, - .num_boxes = 4, - .perf_ctr_bits = 48, - .event_ctl = HSWEP_S0_MSR_PMON_CTL0, - .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, - .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, - .msr_offset = HSWEP_SBOX_MSR_OFFSET, - .ops = &hswep_uncore_sbox_msr_ops, - .format_group = &hswep_uncore_sbox_format_group, -}; - -#define BDX_MSR_UNCORE_SBOX 3 - static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, &hswep_uncore_pcu, - &bdx_uncore_sbox, NULL, }; @@ -2897,10 +2880,6 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { From f90d83b301701026b2e4c437a3613f377f63290e Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Fri, 3 Jun 2016 14:45:25 +0800 Subject: [PATCH 0188/1050] ALSA: hda - Fix headset mic detection problem for Dell machine Add the pin configuration value of this machine into the pin_quirk table to make DELL1_MIC_NO_PRESENCE apply to this machine. Signed-off-by: AceLan Kao Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7960316d767c..49d581aed7f9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5822,6 +5822,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170130}, {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5565", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60180}, + {0x14, 0x90170120}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, From 55f1ea15216a5a14c96738bd5284100a00ffa9dc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 31 May 2016 11:23:43 +0100 Subject: [PATCH 0189/1050] efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps Commit: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") introduced a regression for systems booted with the 'noefi' kernel option. In particular, I observed an early kernel hang in efi_find_mirror()'s for_each_efi_memory_desc() call. As we don't have efi memmap on this system we enter this iterator with the following parameters: efi.memmap.map = 0, efi.memmap.map_end = 0, efi.memmap.desc_size = 28 ... then for_each_efi_memory_desc_in_map() does the following comparison: (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); ... where md = 0, (m)->map_end = 0 and (m)->desc_size = 28 but when we subtract something from a NULL pointer wrap around happens and we end up returning invalid pointer and crash. Fix it by using the correct pointer arithmetics. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Mark Salter Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") Link: http://lkml.kernel.org/r/1464690224-4503-2-git-send-email-matt@codeblueprint.co.uk [ Made the changelog more readable. ] Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index c2db3ca22217..f196dd0b0f2f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1005,7 +1005,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ + ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** From c75343972b79ef5bd44c498a63b326e37470bbfc Mon Sep 17 00:00:00 2001 From: Dennis Chen Date: Tue, 31 May 2016 11:23:44 +0100 Subject: [PATCH 0190/1050] efi/arm: Fix the format of EFI debug messages When both EFI and memblock debugging is enabled on the kernel command line: 'efi=debug memblock=debug' .. the debug messages for early_con look the following way: [ 0.000000] efi: 0x0000e1050000-0x0000e105ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e1300000-0x0000e1300fff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e8200000-0x0000e827ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x008000000000-0x008001e7ffff [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] [ 0.000000] memblock_add: [0x00008000000000-0x00008001e7ffff] flags 0x0 early_init_dt_add_memory_arch+0x54/0x5c [ 0.000000] * ... Note the misplaced '*' line, which happened because the memblock debug message was printed while the EFI debug message was still being constructed.. This patch fixes the output to be the expected: [ 0.000000] efi: 0x0000e1050000-0x0000e105ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e1300000-0x0000e1300fff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e8200000-0x0000e827ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x008000000000-0x008001e7ffff [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC]* [ 0.000000] memblock_add: [0x00008000000000-0x00008001e7ffff] flags 0x0 early_init_dt_add_memory_arch+0x54/0x5c ... Note how the '*' is now in the proper EFI debug message line. Signed-off-by: Dennis Chen Signed-off-by: Matt Fleming Acked-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dan Williams Cc: Linus Torvalds Cc: Mark Salter Cc: Peter Zijlstra Cc: Steve Capper Cc: Steve McIntyre Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1464690224-4503-3-git-send-email-matt@codeblueprint.co.uk [ Made the changelog more readable. ] Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-init.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index a850cbc48d8d..c49d50e68aee 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -174,6 +174,7 @@ static __init void reserve_regions(void) { efi_memory_desc_t *md; u64 paddr, npages, size; + int resv; if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); @@ -190,12 +191,14 @@ static __init void reserve_regions(void) paddr = md->phys_addr; npages = md->num_pages; + resv = is_reserve_region(md); if (efi_enabled(EFI_DBG)) { char buf[64]; - pr_info(" 0x%012llx-0x%012llx %s", + pr_info(" 0x%012llx-0x%012llx %s%s\n", paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); + efi_md_typeattr_format(buf, sizeof(buf), md), + resv ? "*" : ""); } memrange_efi_to_native(&paddr, &npages); @@ -204,14 +207,9 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md)) { + if (resv) memblock_mark_nomap(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); } set_bit(EFI_MEMMAP, &efi.flags); From c7103f650a11328f28b9fa1c95027db331b7774b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 31 May 2016 11:50:28 -0700 Subject: [PATCH 0191/1050] EDAC, sb_edac: Fix rank lookup on Broadwell Broadwell made a small change to the rank target register moving the target rank ID field up from bits 16:19 to bits 20:23. Also found that the offset field grew by one bit in the IVY_BRIDGE to HASWELL transition, so fix the RIR_OFFSET() macro too. Signed-off-by: Tony Luck Cc: stable@vger.kernel.org # v3.19+ Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/2943fb819b1f7e396681165db9c12bb3df0e0b16.1464735623.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index b4d0bf6534cf..ace662e8a7a8 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, }; -#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) -#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) +#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \ + GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19)) + +#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \ + GET_BITFIELD(reg, 2, 15) : GET_BITFIELD(reg, 2, 14)) /* Device 16, functions 2-7 */ @@ -1894,14 +1897,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_tad[i], rir_offset[j][k], ®); - tmp_mb = RIR_OFFSET(reg) << 6; + tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6; gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), + (u32)RIR_RNK_TGT(pvt->info.type, reg), reg); } } @@ -2234,7 +2237,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], rir_offset[n_rir][idx], ®); - *rank = RIR_RNK_TGT(reg); + *rank = RIR_RNK_TGT(pvt->info.type, reg); edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, From 31143e2933d1675c4c1ba6ce125cdd95870edd85 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Thu, 2 Jun 2016 02:00:27 -0700 Subject: [PATCH 0192/1050] brcmfmac: add eth_type_trans back for PCIe full dongle A regression was introduced in commit 9c349892ccc9 ("brcmfmac: revise handling events in receive path") which moves eth_type_trans() call to brcmf_rx_frame(). Msgbuf layer doesn't use brcmf_rx_frame() but invokes brcmf_netif_rx() directly. In such case the Ethernet header was not stripped out resulting in null pointer dereference in the networking stack. BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] enqueue_to_backlog+0x56/0x260 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: fuse ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype [...] rtsx_pci scsi_mod usbcore usb_common i8042 serio nvme nvme_core CPU: 7 PID: 1340 Comm: irq/136-brcmf_p Not tainted 4.7.0-rc1-mainline #1 Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 01.02.00 04/07/2016 task: ffff8804a0c5bd00 ti: ffff88049e124000 task.ti: ffff88049e124000 RIP: 0010:[] [] enqueue_to_backlog+0x56/0x260 RSP: 0018:ffff88049e127ca0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8804bddd7c40 RCX: 000000000000002f RDX: 0000000000000000 RSI: 0000000000000007 RDI: ffff8804bddd7d4c RBP: ffff88049e127ce8 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8804bddd12c0 R11: 000000000000149e R12: 0000000000017c40 R13: ffff88049e127d08 R14: ffff8804a9bd6d00 R15: ffff8804bddd7d4c FS: 0000000000000000(0000) GS:ffff8804bddc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000048 CR3: 0000000001806000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff8804bdddad00 ffff8804ad089e00 0000000000000000 0000000000000282 0000000000000000 ffff8804a9bd6d00 ffff8804a1b27e00 ffff8804a9bd6d00 ffff88002ee88000 ffff88049e127d28 ffffffff814c3f3b ffffffff81311fc3 Call Trace: [] netif_rx_internal+0x4b/0x170 [] ? swiotlb_tbl_unmap_single+0xf3/0x120 [] netif_rx_ni+0x27/0xc0 [] brcmf_netif_rx+0x49/0x70 [brcmfmac] [] brcmf_msgbuf_process_rx+0x2b4/0x570 [brcmfmac] [] ? __xen_set_pgd_hyper+0x57/0xd0 [] ? irq_forced_thread_fn+0x70/0x70 [] brcmf_proto_msgbuf_rx_trigger+0x31/0xe0 [brcmfmac] [] brcmf_pcie_isr_thread+0x7f/0x110 [brcmfmac] [] irq_thread_fn+0x20/0x50 [] irq_thread+0x12d/0x1c0 [] ? __schedule+0x2f5/0x7a0 [] ? wake_threads_waitq+0x30/0x30 [] ? irq_thread_dtor+0xb0/0xb0 [] kthread+0xd8/0xf0 [] ret_from_fork+0x1f/0x40 [] ? kthread_worker_fn+0x170/0x170 Code: 1c f5 60 9a 8e 81 9c 58 0f 1f 44 00 00 48 89 45 d0 fa 66 0f 1f 44 00 00 4c 8d bb 0c 01 00 00 4c 89 ff e8 5e 08 11 00 49 8b 56 20 <48> 8b 52 48 83 e2 01 74 10 8b 8b 08 01 00 00 8b 15 59 c5 42 00 RIP [] enqueue_to_backlog+0x56/0x260 RSP CR2: 0000000000000048 Fixes: 9c349892ccc9 ("brcmfmac: revise handling events in receive path") Reported-by: Rafal Milecki Reported-by: Grey Christoforo Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Signed-off-by: Franky Lin [arend@broadcom.com: rephrased the commit message] Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c index 68f1ce02f4bf..2b9a2bc429d6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c @@ -1157,6 +1157,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf) brcmu_pkt_buf_free_skb(skb); return; } + + skb->protocol = eth_type_trans(skb, ifp->ndev); brcmf_netif_rx(ifp, skb); } From fbedcaf43fba35677c01a4ae51e6f79edf4049ba Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Thu, 19 May 2016 18:45:58 -0400 Subject: [PATCH 0193/1050] EDAC: Fix workqueues poll period resetting After the workqueue cleanup, we're registering workqueues based on the presence of an ->edac_check function. When that is the case, we're setting OP_RUNNING_POLL. But we forgot to check that in edac_mc_reset_delay_period(), leading to: BUG: unable to handle kernel paging request at 0000000000015d10 IP: [ .. ] queued_spin_lock_slowpath PGD 3ffcc8067 PUD 3ffc56067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: ... CPU: 1 PID: 2792 Comm: edactest Not tainted 4.6.0-dirty #1 Hardware name: HP ProLiant MicroServer, BIOS O41 10/01/2013 Stack: Call Trace: ? _raw_spin_lock_irqsave ? lock_timer_base.isra.34 ? del_timer ? try_to_grab_pending ? mod_delayed_work_on ? edac_mc_reset_delay_period ? edac_set_poll_msec ? param_attr_store ? module_attr_store ? kernfs_fop_write ? __vfs_write ? __vfs_read ? __alloc_fd ? vfs_write ? SyS_write ? entry_SYSCALL_64_fastpath Code: RIP [ .. ] queued_spin_lock_slowpath RSP <> CR2: 0000000000015d10 ---[ end trace 3f286bc71cca15d1 ]--- Kernel panic - not syncing: Fatal exception Fix it. Signed-off-by: Nicholas Krause Cc: # 4.5 Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/1463697958-13406-1-git-send-email-xerofoify@gmail.com [ Rewrite commit message. ] Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 6aa256b0a1ed..c3ee3ad98a63 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mod_work(&mci->work, value); + if (mci->op_state == OP_RUNNING_POLL) + edac_mod_work(&mci->work, value); } mutex_unlock(&mem_ctls_mutex); } From 99543823357966ac938d9a310947e731b67338e6 Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Tue, 3 May 2016 15:27:09 +0300 Subject: [PATCH 0194/1050] iio: Fix error handling in iio_trigger_attach_poll_func When attaching a pollfunc iio_trigger_attach_poll_func will allocate a virtual irq and call the driver's set_trigger_state function. Fix error handling to undo previous steps if any fails. In particular this fixes handling errors from a driver's set_trigger_state function. When using triggered buffers a failure to enable the trigger used to make the buffer unusable. Signed-off-by: Crestez Dan Leonard Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-trigger.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index ae2806aafb72..0c52dfe64977 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -210,22 +210,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig, /* Prevent the module from being removed whilst attached to a trigger */ __module_get(pf->indio_dev->info->driver_module); + + /* Get irq number */ pf->irq = iio_trigger_get_irq(trig); + if (pf->irq < 0) + goto out_put_module; + + /* Request irq */ ret = request_threaded_irq(pf->irq, pf->h, pf->thread, pf->type, pf->name, pf); - if (ret < 0) { - module_put(pf->indio_dev->info->driver_module); - return ret; - } + if (ret < 0) + goto out_put_irq; + /* Enable trigger in driver */ if (trig->ops && trig->ops->set_trigger_state && notinuse) { ret = trig->ops->set_trigger_state(trig, true); if (ret < 0) - module_put(pf->indio_dev->info->driver_module); + goto out_free_irq; } return ret; + +out_free_irq: + free_irq(pf->irq, pf); +out_put_irq: + iio_trigger_put_irq(trig, pf->irq); +out_put_module: + module_put(pf->indio_dev->info->driver_module); + return ret; } static int iio_trigger_detach_poll_func(struct iio_trigger *trig, From 590b92a30242dd3f73de3d9a51d9924f1ab33e93 Mon Sep 17 00:00:00 2001 From: Yong Li Date: Thu, 5 May 2016 16:10:49 +0800 Subject: [PATCH 0195/1050] iio: light apds9960: Add the missing dev.parent Without this, the iio:deviceX is missing in the /sys/bus/i2c/devices/0-0039 Some userspace tools use this path to identify a specific instance of the device. Signed-off-by: Yong Li Reviewed-By: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9960.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b4dbb3912977..651d57b8abbf 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -1011,6 +1011,7 @@ static int apds9960_probe(struct i2c_client *client, iio_device_attach_buffer(indio_dev, buffer); + indio_dev->dev.parent = &client->dev; indio_dev->info = &apds9960_info; indio_dev->name = APDS9960_DRV_NAME; indio_dev->channels = apds9960_channels; From 3993546646baf1dab5f5c4f7d9bb58f2046fd1c1 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 28 May 2016 23:02:50 +0300 Subject: [PATCH 0196/1050] of: irq: fix of_irq_get[_byname]() kernel-doc The kernel-doc for the of_irq_get[_byname]() is clearly inadequate in describing the return values -- of_irq_get_byname() is documented better than of_irq_get() but it still doesn't mention that 0 is returned iff irq_create_of_mapping() fails (it doesn't return an error code in this case). Document all possible return value variants, making the writing of the word "IRQ" consistent, while at it... Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Fixes: ad69674e73a1 ("of/irq: do irq resolution in platform_get_irq_byname()") Signed-off-by: Sergei Shtylyov CC: stable@vger.kernel.org Signed-off-by: Rob Herring --- drivers/of/irq.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index e7bfc175b8e1..6ec743faabe8 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -386,13 +386,13 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) EXPORT_SYMBOL_GPL(of_irq_to_resource); /** - * of_irq_get - Decode a node's IRQ and return it as a Linux irq number + * of_irq_get - Decode a node's IRQ and return it as a Linux IRQ number * @dev: pointer to device tree node - * @index: zero-based index of the irq - * - * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain - * is not yet created. + * @index: zero-based index of the IRQ * + * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or + * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case + * of any other failure. */ int of_irq_get(struct device_node *dev, int index) { @@ -413,12 +413,13 @@ int of_irq_get(struct device_node *dev, int index) EXPORT_SYMBOL_GPL(of_irq_get); /** - * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number + * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number * @dev: pointer to device tree node - * @name: irq name + * @name: IRQ name * - * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain - * is not yet created, or error code in case of any other failure. + * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or + * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case + * of any other failure. */ int of_irq_get_byname(struct device_node *dev, const char *name) { From 7d482813bb3518cbfeae1b987a5afd76a88c7eb3 Mon Sep 17 00:00:00 2001 From: Jaewon Date: Wed, 25 May 2016 13:29:50 +0900 Subject: [PATCH 0197/1050] drivers: of: of_reserved_mem: fixup the CMA alignment not to affect dma-coherent There was an alignment mismatch issue for CMA and it was fixed by commit 1cc8e3458b51 ("drivers: of: of_reserved_mem: fixup the alignment with CMA setup"). However the way of the commit considers not only dma-contiguous(CMA) but also dma-coherent which has no that requirement. This patch checks more to distinguish dma-contiguous(CMA) from dma-coherent. Signed-off-by: Jaewon Kim Acked-by: Jason Liu Acked-by: Marek Szyprowski [robh: remove erroneous opening bracket] Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index ed01c0172e4a..ed7e681efcf0 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -127,7 +127,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, } /* Need adjust the alignment to satisfy the CMA requirement */ - if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool")) + if (IS_ENABLED(CONFIG_CMA) + && of_flat_dt_is_compatible(node, "shared-dma-pool") + && of_get_flat_dt_prop(node, "reusable", NULL) + && !of_get_flat_dt_prop(node, "no-map", NULL)) align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); From 94bef000f1d4aa111f4ddda1482cf3b30ad069ce Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sun, 29 May 2016 19:52:02 -0700 Subject: [PATCH 0198/1050] iio: hudmidity: hdc100x: fix incorrect shifting and scaling Shifting sensor data to the right 2 bits was incorrect and caused the scaling values + offsets to be invalid. Reported-by: Alison Schofield Signed-off-by: Matt Ranostay Tested-by: Alison Schofield Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 30709838dcdc..a03832a5fc95 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -164,14 +164,14 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, dev_err(&client->dev, "cannot read high byte measurement"); return ret; } - val = ret << 6; + val = ret << 8; ret = i2c_smbus_read_byte(client); if (ret < 0) { dev_err(&client->dev, "cannot read low byte measurement"); return ret; } - val |= ret >> 2; + val |= ret; return val; } @@ -212,17 +212,17 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: if (chan->type == IIO_TEMP) { *val = 165000; - *val2 = 65536 >> 2; + *val2 = 65536; return IIO_VAL_FRACTIONAL; } else { - *val = 0; - *val2 = 10000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 100; + *val2 = 65536; + return IIO_VAL_FRACTIONAL; } break; case IIO_CHAN_INFO_OFFSET: - *val = -3971; - *val2 = 879096; + *val = -15887; + *val2 = 515151; return IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; From aaaab56dba9af4fe75461e0ee13231c1a6ea174d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 31 May 2016 09:38:56 +1000 Subject: [PATCH 0199/1050] of: silence warnings due to max() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pageblock_order can be (at least) an unsigned int or an unsigned long depending on the kernel config and architecture, so use max_t(unsigned long ...) when comparing it. fixes these warnings: In file included from include/linux/list.h:8:0, from include/linux/kobject.h:20, from include/linux/of.h:21, from drivers/of/of_reserved_mem.c:17: drivers/of/of_reserved_mem.c: In function ‘__reserved_mem_alloc_size’: include/linux/kernel.h:748:17: warning: comparison of distinct pointer types lacks a cast (void) (&_max1 == &_max2); \ ^ include/linux/kernel.h:747:9: note: in definition of macro ‘max’ typeof(y) _max2 = (y); \ ^ drivers/of/of_reserved_mem.c:131:48: note: in expansion of macro ‘max’ align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_ord ^ include/linux/kernel.h:748:17: warning: comparison of distinct pointer types lacks a cast (void) (&_max1 == &_max2); \ ^ include/linux/kernel.h:747:21: note: in definition of macro ‘max’ typeof(y) _max2 = (y); \ ^ drivers/of/of_reserved_mem.c:131:48: note: in expansion of macro ‘max’ align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_ord ^ Fixes: 1cc8e3458b51 ("drivers: of: of_reserved_mem: fixup the alignment with CMA setup") Signed-off-by: Stephen Rothwell Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index ed7e681efcf0..216648233874 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -130,8 +130,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool") && of_get_flat_dt_prop(node, "reusable", NULL) - && !of_get_flat_dt_prop(node, "no-map", NULL)) - align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); + && !of_get_flat_dt_prop(node, "no-map", NULL)) { + unsigned long order = + max_t(unsigned long, MAX_ORDER - 1, pageblock_order); + + align = max(align, (phys_addr_t)PAGE_SIZE << order); + } prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); if (prop) { From e93aeeae0bf8cba43e05484ae4ad17213ac6c3a7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 25 May 2016 01:15:04 +0000 Subject: [PATCH 0200/1050] of: add missing const for of_parse_phandle_with_args() in !CONFIG_OF commit 93c667ca2598bd84f1bd3f2fa176af69707699fe ("of: *node argument to of_parse_phandle_with_args should be const") changed to const for struct device node *np, but it cares CONFIG_OF case only, !CONFIG_OF case need it too. Signed-off-by: Kuninori Morimoto Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of.h b/include/linux/of.h index c7292e8ea080..74eb28cadbef 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -614,7 +614,7 @@ static inline struct device_node *of_parse_phandle(const struct device_node *np, return NULL; } -static inline int of_parse_phandle_with_args(struct device_node *np, +static inline int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, From 53e1941ce8a25543eef2016b8de69d155f5151a5 Mon Sep 17 00:00:00 2001 From: Wei-Ning Huang Date: Wed, 1 Jun 2016 11:09:24 +0800 Subject: [PATCH 0201/1050] dt: bindings: fix documentation for MARVELL's bt-sd8xxx wireless device The property marvell,wakeup-pin and marvell,wakeup-gap-ms are read as u16 in the driver. Fix documentation and example accordingly. Signed-off-by: Wei-Ning Huang Signed-off-by: Rob Herring --- .../devicetree/bindings/net/marvell-bt-sd8xxx.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt index 14aa6cf58201..6a9a63cb0543 100644 --- a/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt +++ b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt @@ -13,10 +13,10 @@ Optional properties: initialization. This is an array of 28 values(u8). - marvell,wakeup-pin: It represents wakeup pin number of the bluetooth chip. - firmware will use the pin to wakeup host system. + firmware will use the pin to wakeup host system (u16). - marvell,wakeup-gap-ms: wakeup gap represents wakeup latency of the host platform. The value will be configured to firmware. This - is needed to work chip's sleep feature as expected. + is needed to work chip's sleep feature as expected (u16). - interrupt-parent: phandle of the parent interrupt controller - interrupts : interrupt pin number to the cpu. Driver will request an irq based on this interrupt number. During system suspend, the irq will be @@ -50,7 +50,7 @@ calibration data is also available in below example. 0x37 0x01 0x1c 0x00 0xff 0xff 0xff 0xff 0x01 0x7f 0x04 0x02 0x00 0x00 0xba 0xce 0xc0 0xc6 0x2d 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xf0 0x00>; - marvell,wakeup-pin = <0x0d>; - marvell,wakeup-gap-ms = <0x64>; + marvell,wakeup-pin = /bits/ 16 <0x0d>; + marvell,wakeup-gap-ms = /bits/ 16 <0x64>; }; }; From f83803305b008ca3d75b00092b6493bc19eda60b Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Wed, 1 Jun 2016 08:55:00 -0700 Subject: [PATCH 0202/1050] of: add vendor prefix for UniWest United Western Technologies Corp, known primarily as UniWest, is a manufacturer of eddy current and ultrasonic testing equipment. Signed-off-by: Joshua Clayton Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index a7440bcd67ff..1387cd33064e 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -269,6 +269,7 @@ tronsmart Tronsmart truly Truly Semiconductors Limited tyan Tyan Computer Corporation upisemi uPI Semiconductor Corp. +uniwest United Western Technologies Corp (UniWest) urt United Radiant Technology Corporation usi Universal Scientific Industrial Co., Ltd. v3 V3 Semiconductor From 62d0e71df063101e4551327bd9fa9aaa3535c86b Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 3 Jun 2016 08:54:18 +0800 Subject: [PATCH 0203/1050] clk: rockchip: release io resource when failing to init clk on rk3399 We should call iounmap to relase reg_base since it's not going to be used any more if failing to init clk. This was missing on the newly added rk3399 clock tree. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 9f86bfef70f7..8059a8d3ea36 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1510,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np) ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); if (IS_ERR(ctx)) { pr_err("%s: rockchip clk init failed\n", __func__); + iounmap(reg_base); return; } @@ -1555,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np) ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); if (IS_ERR(ctx)) { pr_err("%s: rockchip pmu clk init failed\n", __func__); + iounmap(reg_base); return; } From 665f05e0b836d46b22f0b712eb76d8b7f69a5ea0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 2 Jun 2016 10:58:08 -0700 Subject: [PATCH 0204/1050] EDAC, sb_edac: Readd accidentally dropped Broadwell-D support In commit 2c1ea4c700af ("EDAC, sb_edac: Use cpu family/model in driver detection") we switched from using PCI ids to determine which platform we are running on to using CPU model instead. I forgot that Broadwell-DE has its own distinct model number different from Broadwell-EP or -EX. Fixing this isn't just adding a line to the array of cpuids - the exising code assumed a 1:1 mapping between entries in that array and the "enum type" values. Added the type to pci_id_table structure to remove this dependency and allows two Broadwell cpu models. Signed-off-by: Tony Luck Cc: Aristeu Rozanski Cc: Dave Hansen Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 2c1ea4c700af ("EDAC, sb_edac: Use cpu family/model in driver detection") Link: http://lkml.kernel.org/r/b3cffe40dec6dfe0235a5d52a504f0ba86a07ce7.1464902605.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ace662e8a7a8..6744d88bdea8 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -329,6 +329,7 @@ struct pci_id_descr { struct pci_id_table { const struct pci_id_descr *descr; int n_devs; + enum type type; }; struct sbridge_dev { @@ -397,9 +398,14 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = { { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, }; -#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } +#define PCI_ID_TABLE_ENTRY(A, T) { \ + .descr = A, \ + .n_devs = ARRAY_SIZE(A), \ + .type = T \ +} + static const struct pci_id_table pci_dev_descr_sbridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -466,7 +472,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { }; static const struct pci_id_table pci_dev_descr_ibridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -539,7 +545,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = { }; static const struct pci_id_table pci_dev_descr_haswell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL), {0,} /* 0 terminated list. */ }; @@ -583,7 +589,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = { }; static const struct pci_id_table pci_dev_descr_knl_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_knl), + PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING), {0,} }; @@ -651,7 +657,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = { }; static const struct pci_id_table pci_dev_descr_broadwell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL), {0,} /* 0 terminated list. */ }; @@ -3360,12 +3366,12 @@ fail0: #define ICPU(model, table) \ { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } -/* Order here must match "enum type" */ static const struct x86_cpu_id sbridge_cpuids[] = { ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ + ICPU(0x56, pci_dev_descr_broadwell_table), /* BROADWELL-DE */ ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ { } }; @@ -3401,7 +3407,7 @@ static int sbridge_probe(const struct x86_cpu_id *id) mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids); + rc = sbridge_register_mci(sbridge_dev, ptable->type); if (unlikely(rc < 0)) goto fail1; } From 15b7cc78f0951e418c940d8b3b6a7a3b962b7748 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 May 2016 11:12:33 +0900 Subject: [PATCH 0205/1050] arm64: dts: drop "arm,amba-bus" in favor of "simple-bus" part 2 Tree-wide replacement was done by commit 2ef7d5f342c1 (ARM, ARM64: dts: drop "arm,amba-bus" in favor of "simple-bus"), but we have some new users of "arm,amba-bus" at Linux 4.7-rc1. Eliminate them now. Signed-off-by: Masahiro Yamada Acked-by: Heiko Stuebner Acked-by: Chanho Min Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/lg/lg1312.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 3a4e9a2ab313..fbafa24cd533 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -125,7 +125,7 @@ #size-cells = <1>; #interrupts-cells = <3>; - compatible = "arm,amba-bus"; + compatible = "simple-bus"; interrupt-parent = <&gic>; ranges; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 46f325a143b0..d7f8e06910bc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -163,7 +163,7 @@ }; amba { - compatible = "arm,amba-bus"; + compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; From 5fdb884267890b26fdfd5ff5ddaf596745b9ab43 Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Fri, 13 May 2016 21:57:16 +0200 Subject: [PATCH 0206/1050] ARM: dts: sunxi: Add OLinuXino Lime2 eMMC to the Makefile commit 27dd9af6bc000ab21fd ("ARM: dts: sunxi: Add a olinuxino-lime2-emmc") added the new emmc equipped lime2 but forgot its Makefile. This patch adds an entry to the Makefile. Signed-off-by: Olliver Schinagl Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 06b6c2d695bf..414b42710a36 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -741,6 +741,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \ sun7i-a20-olimex-som-evb.dtb \ sun7i-a20-olinuxino-lime.dtb \ sun7i-a20-olinuxino-lime2.dtb \ + sun7i-a20-olinuxino-lime2-emmc.dtb \ sun7i-a20-olinuxino-micro.dtb \ sun7i-a20-orangepi.dtb \ sun7i-a20-orangepi-mini.dtb \ From cb84f6c0a25eb76b1f838eb63e212705c0c06b5f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:13 +0800 Subject: [PATCH 0207/1050] ARM: dts: sun6i: primo81: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-primo81.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts index 68b479b8772c..73c133f5e79c 100644 --- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts +++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts @@ -176,8 +176,6 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; regulator-name = "vcc-lcd"; }; From b223d6242c372a81cca3bb81998f53d3b3e3fb70 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:14 +0800 Subject: [PATCH 0208/1050] ARM: dts: sun6i: yones-toptech-bs1078-v2: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts index 360adfb1e9ca..d6ad6196a768 100644 --- a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts @@ -135,8 +135,6 @@ ®_dc1sw { regulator-name = "vcc-lcd-usb2"; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; }; ®_dc5ldo { From 357cc9b4a8a7a0cd0e662537b76e6fa4670b6798 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:15 -0700 Subject: [PATCH 0209/1050] sch_hfsc: always keep backlog updated hfsc updates backlog lazily, that is only when we dump the stats. This is problematic after we begin to update backlog in qdisc_tree_reduce_backlog(). Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..1ac9f9f03fe3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1529,6 +1529,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -1559,14 +1560,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - struct hfsc_class *cl; - unsigned int i; - - sch->qstats.backlog = 0; - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - sch->qstats.backlog += cl->qdisc->qstats.backlog; - } qopt.defcls = q->defcls; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) @@ -1604,6 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl->qdisc->q.qlen == 1) set_active(cl, qdisc_pkt_len(skb)); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1672,6 +1666,7 @@ hfsc_dequeue(struct Qdisc *sch) qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; @@ -1695,6 +1690,7 @@ hfsc_drop(struct Qdisc *sch) } cl->qstats.drops++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } From 6529d75ad9228f4d8a8f6c5c5244ceb945ac9bc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:16 -0700 Subject: [PATCH 0210/1050] sch_prio: update backlog as well We need to update backlog too when we update qlen. Joint work with Stas. Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15506b2..4b0a82191bc4 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -85,6 +85,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -117,6 +118,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -135,6 +137,7 @@ static unsigned int prio_drop(struct Qdisc *sch) for (prio = q->bands-1; prio >= 0; prio--) { qdisc = q->queues[prio]; if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -151,6 +154,7 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); + sch->qstats.backlog = 0; sch->q.qlen = 0; } From 6a73b571b63075ef408c83f07c2565b5652f93cc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:17 -0700 Subject: [PATCH 0211/1050] sch_drr: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879e8975..bf8af2c43c2c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -375,6 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return err; } @@ -407,6 +408,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -428,6 +430,7 @@ static unsigned int drr_drop(struct Qdisc *sch) if (cl->qdisc->ops->drop) { len = cl->qdisc->ops->drop(cl->qdisc); if (len > 0) { + sch->qstats.backlog -= len; sch->q.qlen--; if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); @@ -463,6 +466,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } From d7f4f332f082c4d4ba53582f902ed6b44fd6f45e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: [PATCH 0212/1050] sch_red: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8c0508c0e287..91578bdd378c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } From 8d5958f424b62060a8696b12c17dad198d5d386f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:19 -0700 Subject: [PATCH 0213/1050] sch_tbf: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83b90b584fae..3161e491990b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -207,6 +207,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -217,6 +218,7 @@ static unsigned int tbf_drop(struct Qdisc *sch) unsigned int len = 0; if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; qdisc_qstats_drop(sch); } @@ -263,6 +265,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->t_c = now; q->tokens = toks; q->ptokens = ptoks; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); @@ -294,6 +297,7 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; From 9c8b0778e48e4bbdb77121c6c1b7dd48e5182e67 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Thu, 2 Jun 2016 17:36:28 +0800 Subject: [PATCH 0214/1050] gianfar: fix the last transmit buffer descriptor When the transmit hardware timestamping is enabled, an additional TxBD would be added and would be set as the last TxBD with TXBD_LAST and TXBD_INTERRUPT. However this has been broken by a patch recently. This made the software couldn't get transmit hardware timestamps and resulted in call trace. So, this patch is to fix this issue. Fixes: 48963b4492e9 ("gianfar: Remove redundant ops for do_tstamp from xmit()") Signed-off-by: Yangbo Lu Reviewed-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 7615e0668acb..2e6785b6e8be 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2440,7 +2440,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_queue->tx_ring_size); if (likely(!nr_frags)) { - lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); + if (likely(!do_tstamp)) + lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); } else { u32 lstatus_start = lstatus; From 8478b6cdc10e8a7735deeb9d9e46ad5b157c84d0 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 2 Jun 2016 16:14:52 +0300 Subject: [PATCH 0215/1050] net: ethernet: ti: cpsw: fix rx-usecs interrupt pacing consistency The rx-usecs shouldn't be changed while interface down/up. Currently, for instance, if it's set to 100us, after interface down/up it's 500us. It's a hidden bug that can lead to lavish interrupt pacing time increasing while "down/up" up to max value. Steps to reproduce: - set rx-usecs to be 100us - down/up interface - read new unexpected rx-usecs Signed-off-by: Ivan Khoronzhuk Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4b08a2f52b3e..e6bb0ecb12c7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1339,7 +1339,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (priv->coal_intvl != 0) { struct ethtool_coalesce coal; - coal.rx_coalesce_usecs = (priv->coal_intvl << 4); + coal.rx_coalesce_usecs = priv->coal_intvl; cpsw_set_coalesce(ndev, &coal); } From 207bdf1844286d2ba94523d30528f82aaf33d52b Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:06 +0200 Subject: [PATCH 0216/1050] net-next: mediatek: use mdiobus_free() in favour of kfree() The driver currently uses kfree() to clear the mii_bus. This is not the correct way to clear the memory and mdiobus_free() should be used instead. This patch fixes the two instances where this happens in the driver. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c984462fad2a..28f169f489da 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -280,7 +280,7 @@ static int mtk_mdio_init(struct mtk_eth *eth) return 0; err_free_bus: - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); err_put_node: of_node_put(mii_np); @@ -295,7 +295,7 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) mdiobus_unregister(eth->mii_bus); of_node_put(eth->mii_bus->dev.of_node); - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); } static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) From 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:07 +0200 Subject: [PATCH 0217/1050] net-next: mediatek: fix gigabit and flow control advertisement The current code will not setup the PHYs advertisement features correctly. Fix this and properly advertise Gigabit features and properly handle asymmetric pause frames. Signed-off-by: Sean Wang Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 30 ++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 28f169f489da..b0652f6182f4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -133,6 +133,8 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); + u16 lcl_adv = 0, rmt_adv = 0; + u8 flowctrl; u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | @@ -150,11 +152,30 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) + if (mac->phy_dev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) - mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC; + if (mac->phy_dev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (mac->phy_dev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + + if (mac->phy_dev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + + if (flowctrl & FLOW_CTRL_TX) + mcr |= MAC_MCR_FORCE_TX_FC; + if (flowctrl & FLOW_CTRL_RX) + mcr |= MAC_MCR_FORCE_RX_FC; + + netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n", + flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled", + flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled"); + } mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); @@ -236,7 +257,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; - mac->phy_dev->supported &= PHY_BASIC_FEATURES; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); From 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:08 +0200 Subject: [PATCH 0218/1050] net-next: mediatek: add fixed-phy support The MT7623 SoC has a builtin gigabit switch. If we want to use it, GMAC1 needs to be configured using a fixed link speed and flow control settings. The easiest way to do this is to used the fixed-phy driver, allowing us to reuse the existing mdio polling code to setup the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b0652f6182f4..231d28444a54 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -229,6 +229,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) u32 val, ge_mode; np = of_parse_phandle(mac->of_node, "phy-handle", 0); + if (!np && of_phy_is_fixed_link(mac->of_node)) + if (!of_phy_register_fixed_link(mac->of_node)) + np = of_node_get(mac->of_node); if (!np) return -ENODEV; From 37920fce0fc10264410eb880d411968b7934b61d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:09 +0200 Subject: [PATCH 0219/1050] net-next: mediatek: properly handle RGMII modes If an external Gigabit PHY is connected to either of the MACs we need to be able to tell the PHY to use a delay. Not doing so will result in heavy packet loss and/or data corruption when using PHYs such as the IC+ IP1001. We tell the PHY which MII delay mode to use via the devictree. The ethernet driver needs to be adapted to handle all 3 rgmii-*id modes in the same way as normal rgmii when setting up the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 231d28444a54..4763252bbf85 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -236,6 +236,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: ge_mode = 0; break; From 182fd9eecb287e696c82b30d06c6150d80a49c5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:10 +0200 Subject: [PATCH 0220/1050] MAINTAINERS: Add file patterns for wireless device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Kalle Valo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d397157981c..7e4734623579 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7851,6 +7851,7 @@ Q: http://patchwork.kernel.org/project/linux-wireless/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git S: Maintained +F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ NETXEN (1/10) GbE SUPPORT From fac7d1917dfddfa53e98524f0abfbe60252740fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jun 2016 11:41:49 -0400 Subject: [PATCH 0221/1050] fix EOPENSTALE bug in do_last() EOPENSTALE occuring at the last component of a trailing symlink ends up with do_last() retrying its lookup. After the symlink body has been discarded. The thing is, all this retry_lookup logics in there is not needed at all - the upper layers will do the right thing if we simply return that -EOPENSTALE as we would with any other error. Trying to microoptimize in do_last() is a lot of headache for no good reason. Cc: stable@vger.kernel.org # v4.2+ Tested-by: Oleg Drokin Reviewed-and-Tested-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 43 ++++--------------------------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 4c4f95ac8aa5..3d9511e656ab 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3166,9 +3166,7 @@ static int do_last(struct nameidata *nd, int acc_mode = op->acc_mode; unsigned seq; struct inode *inode; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; struct path path; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -3211,7 +3209,6 @@ static int do_last(struct nameidata *nd, return -EISDIR; } -retry_lookup: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) @@ -3292,23 +3289,14 @@ finish_lookup: if (unlikely(error)) return error; - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { - path_to_nameidata(&path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path.mnt); - nd->path.dentry = path.dentry; - - } + path_to_nameidata(&path, nd); nd->inode = inode; nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return error; - } audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) @@ -3331,13 +3319,9 @@ finish_open_created: goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); - if (!error) { - *opened |= FILE_OPENED; - } else { - if (error == -EOPENSTALE) - goto stale_open; + if (error) goto out; - } + *opened |= FILE_OPENED; opened: error = open_check_o_direct(file); if (!error) @@ -3353,26 +3337,7 @@ out: } if (got_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); return error; - -stale_open: - /* If no saved parent or already retried then can't retry */ - if (!save_parent.dentry || retried) - goto out; - - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (got_write) { - mnt_drop_write(nd->path.mnt); - got_write = false; - } - retried = true; - goto retry_lookup; } static int do_tmpfile(struct nameidata *nd, unsigned flags, From 69a834c28fb514403eb91a4f0120da214a52e056 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 May 2016 18:29:38 -0400 Subject: [PATCH 0222/1050] drm/msm: deal with exhausted vmap space better Some, but not all, callers of obj->vmap() would check if return IS_ERR(). So let's actually return an error if vmap() fails. And fixup the call-sites that were not handling this properly. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/msm_fbdev.c | 4 ++++ drivers/gpu/drm/msm/msm_gem.c | 2 ++ drivers/gpu/drm/msm/msm_rd.c | 3 +++ drivers/gpu/drm/msm/msm_ringbuffer.c | 4 ++++ 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index fbe304ee6c80..2aec27dbb5bb 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -408,7 +408,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, } adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo); - if (!adreno_gpu->memptrs) { + if (IS_ERR(adreno_gpu->memptrs)) { dev_err(drm->dev, "could not vmap memptrs\n"); return -ENOMEM; } diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index d9759bf3482e..c6cf837c5193 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -159,6 +159,10 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, dev->mode_config.fb_base = paddr; fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo); + if (IS_ERR(fbi->screen_base)) { + ret = PTR_ERR(fbi->screen_base); + goto fail_unlock; + } fbi->screen_size = fbdev->bo->size; fbi->fix.smem_start = paddr; fbi->fix.smem_len = fbdev->bo->size; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 7daf4054dd2b..69836f5685b1 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -398,6 +398,8 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj) return ERR_CAST(pages); msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (msm_obj->vaddr == NULL) + return ERR_PTR(-ENOMEM); } return msm_obj->vaddr; } diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index b48f73ac6389..0857710c2ff2 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -312,6 +312,9 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf = msm_gem_vaddr_locked(&obj->base); + if (IS_ERR(buf)) + continue; + buf += iova - submit->bos[idx].iova; rd_write_section(rd, RD_GPUADDR, diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 1f14b908b221..42f5359cf988 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -40,6 +40,10 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->start = msm_gem_vaddr_locked(ring->bo); + if (IS_ERR(ring->start)) { + ret = PTR_ERR(ring->start); + goto fail; + } ring->end = ring->start + (size / 4); ring->cur = ring->start; From ba344afd667a6f1c6bf7c53e08fc16bd84fa4df7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 May 2016 18:43:26 -0400 Subject: [PATCH 0223/1050] drm/msm: fix some crashes in submit fail path If submit fails, before fence is created or before submit is added to submit-list, then unitialized fields cause problems in the clean-up path. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b89ca5174863..635eff17c3d1 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -40,12 +40,14 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->dev = dev; submit->gpu = gpu; + submit->fence = NULL; submit->pid = get_pid(task_pid(current)); /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; submit->nr_cmds = 0; + INIT_LIST_HEAD(&submit->node); INIT_LIST_HEAD(&submit->bo_list); ww_acquire_init(&submit->ticket, &reservation_ww_class); From a9e26cab40ecfd4a0d718f22fa30db4dd1edbf60 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 1 Jun 2016 14:02:51 -0400 Subject: [PATCH 0224/1050] drm/msm: fix potential submit error path issue Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 635eff17c3d1..eb4bb8b2f3a5 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -77,6 +77,11 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, void __user *userptr = u64_to_user_ptr(args->bos + (i * sizeof(submit_bo))); + /* make sure we don't have garbage flags, in case we hit + * error path before flags is initialized: + */ + submit->bos[i].flags = 0; + ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); if (ret) { ret = -EFAULT; From e6ec03a25f12b312b7e0c037fe4a6471c4ee5665 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2016 00:23:09 -0400 Subject: [PATCH 0225/1050] autofs braino fix for do_last() It's an analogue of commit 7500c38a (fix the braino in "namei: massage lookup_slow() to be usable by lookup_one_len_unlocked()"). The same problem (->lookup()-returned unhashed negative dentry just might be an autofs one with ->d_manage() that would wait until the daemon makes it positive) applies in do_last() - we need to do follow_managed() first. Fortunately, remaining callers of follow_managed() are OK - only autofs has that weirdness (negative dentry that does not mean an instant -ENOENT)) and autofs never has its negative dentries hashed, so we can't pick one from a dcache lookup. ->d_manage() is a bloody mess ;-/ Cc: stable@vger.kernel.org # v4.6 Spotted-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 3d9511e656ab..d7c0cac56d89 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3260,6 +3260,10 @@ static int do_last(struct nameidata *nd, got_write = false; } + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; + if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; @@ -3275,10 +3279,6 @@ static int do_last(struct nameidata *nd, return -EEXIST; } - error = follow_managed(&path, nd); - if (unlikely(error < 0)) - return error; - seq = 0; /* out of RCU mode, so the value doesn't matter */ inode = d_backing_inode(path.dentry); finish_lookup: From 3ec10d3a2ba591c87da94219c1e46b02ae97757a Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Mon, 16 May 2016 19:18:09 +0200 Subject: [PATCH 0226/1050] ipvs: update real-server binding of outgoing connections in SIP-pe Previous patch that introduced handling of outgoing packets in SIP persistent-engine did not call ip_vs_check_template() in case packet was matching a connection template. Assumption was that real-server was healthy, since it was sending a packet just in that moment. There are however real-server fault conditions requiring that association between call-id and real-server (represented by connection template) gets updated. Here is an example of the sequence of events: 1) RS1 is a back2back user agent that handled call-id1 and call-id2 2) RS1 is down and was marked as unavailable 3) new message from outside comes to IPVS with call-id1 4) IPVS reschedules the message to RS2, which becomes new call handler 5) RS2 forwards the message outside, translating call-id1 to call-id2 6) inside pe->conn_out() IPVS matches call-id2 with existing template 7) IPVS does not change association call-id2 <-> RS1 8) new message comes from client with call-id2 9) IPVS reschedules the message to a real-server potentially different from RS2, which is now the correct destination This patch introduces ip_vs_check_template() call in the handling of outgoing packets for SIP-pe. And also introduces a second optional argument for ip_vs_check_template() that allows to check if dest associated to a connection template is the same dest that was identified as the source of the packet. This is to change the real-server bound to a particular call-id independently from its availability status: the idea is that it's more reliable, for in->out direction (where internal network can be considered trusted), to always associate a call-id with the last real-server that used it in one of its messages. Think about above sequence of events where, just after step 5, RS1 returns instead to be available. Comparison of dests is done by simply comparing pointers to struct ip_vs_dest; there should be no cases where struct ip_vs_dest keeps its memory address, but represent a different real-server in terms of ip-address / port. Fixes: 39b972231536 ("ipvs: handle connections started by real-servers") Signed-off-by: Marco Angaroni Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af4c10ebb241..cd6018a9ee24 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1232,7 +1232,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp); const char *ip_vs_state_name(__u16 proto, int state); void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); -int ip_vs_check_template(struct ip_vs_conn *ct); +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); void ip_vs_random_dropentry(struct netns_ipvs *ipvs); int ip_vs_conn_init(void); void ip_vs_conn_cleanup(void); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2cb3c626cd43..096a45103f14 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -762,7 +762,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, * If available, return 1, otherwise invalidate this connection * template and return 0. */ -int ip_vs_check_template(struct ip_vs_conn *ct) +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) { struct ip_vs_dest *dest = ct->dest; struct netns_ipvs *ipvs = ct->ipvs; @@ -772,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - expire_quiescent_template(ipvs, dest)) { + expire_quiescent_template(ipvs, dest) || + (cdest && (dest != cdest))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1207f20d24e4..2c1b498a7a27 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -321,7 +321,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); - if (!ct || !ip_vs_check_template(ct)) { + if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* @@ -1154,7 +1154,8 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, vport, ¶m) < 0) return NULL; ct = ip_vs_ct_in_get(¶m); - if (!ct) { + /* check if template exists and points to the same dest */ + if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { From 1e407ee3b21f981140491d5b8a36422979ca246f Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Mon, 25 Apr 2016 09:19:17 -0700 Subject: [PATCH 0227/1050] powerpc/ptrace: Fix out of bounds array access warning gcc-6 correctly warns about a out of bounds access arch/powerpc/kernel/ptrace.c:407:24: warning: index 32 denotes an offset greater than size of 'u64[32][1] {aka long long unsigned int[32][1]}' [-Warray-bounds] offsetof(struct thread_fp_state, fpr[32][0])); ^ check the end of array instead of beginning of next element to fix this Signed-off-by: Khem Raj Cc: Kees Cook Cc: Michael Ellerman Cc: Segher Boessenkool Tested-by: Aaro Koskinen Acked-by: Olof Johansson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 30a03c03fe73..060b140f03c6 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -377,7 +377,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); @@ -405,7 +405,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return 0; #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); From 8a934efe94347eee843aeea65bdec8077a79e259 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 May 2016 09:56:07 +1000 Subject: [PATCH 0228/1050] powerpc/pseries: Fix PCI config address for DDW In commit 8445a87f7092 "powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism", the PE address was replaced with the PCI config address in order to remove dependency on EEH. According to PAPR spec, firmware (pHyp or QEMU) should accept "xxBBSSxx" format PCI config address, not "xxxxBBSS" provided by the patch. Note that "BB" is PCI bus number and "SS" is the combination of slot and function number. This fixes the PCI address passed to DDW RTAS calls. Fixes: 8445a87f7092 ("powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism") Cc: stable@vger.kernel.org # v3.4+ Reported-by: Guilherme G. Piccoli Signed-off-by: Gavin Shan Tested-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b7dfc1359d01..3e8865b187de 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -927,7 +927,7 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); @@ -956,7 +956,7 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ From a02cc9d3cc9f98905df214d4a57e5918473260ea Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Jun 2016 15:32:18 +0200 Subject: [PATCH 0229/1050] bnx2x: allow adding VLANs while interface is down Since implementing VLAN filtering in commit 05cc5a39ddb74 ("bnx2x: add vlan filtering offload") bnx2x refuses to add a VLAN while the interface is down: # ip link add link enp3s0f0 enp3s0f0_10 type vlan id 10 RTNETLINK answers: Bad address and in dmesg (with bnx2x.debug=0x20): bnx2x: [bnx2x_vlan_rx_add_vid:12941(enp3s0f0)]Ignoring VLAN configuration the interface is down Other drivers have no problem with this. Fix this peculiar behavior in the following way: - Accept requests to add/kill VID regardless of the device state. Maintain the requested list of VIDs in the bp->vlan_reg list. - If the device is up, try to configure the VID list into the hardware. If we run out of VLAN credits or encounter a failure configuring an entry, fall back to accepting all VLANs. If we successfully configure all entries from the list, turn the fallback off. - Use the same code for reconfiguring VLANs during NIC load. Signed-off-by: Michal Schmidt Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 151 +++++++----------- 1 file changed, 62 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c5fe915870ad..a59d55e25d5f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12895,52 +12895,71 @@ static int __bnx2x_vlan_configure_vid(struct bnx2x *bp, u16 vid, bool add) return rc; } -int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +static int bnx2x_vlan_configure_vid_list(struct bnx2x *bp) { struct bnx2x_vlan_entry *vlan; int rc = 0; - if (!bp->vlan_cnt) { - DP(NETIF_MSG_IFUP, "No need to re-configure vlan filters\n"); - return 0; - } - + /* Configure all non-configured entries */ list_for_each_entry(vlan, &bp->vlan_reg, link) { - /* Prepare for cleanup in case of errors */ - if (rc) { - vlan->hw = false; - continue; - } - - if (!vlan->hw) + if (vlan->hw) continue; - DP(NETIF_MSG_IFUP, "Re-configuring vlan 0x%04x\n", vlan->vid); + if (bp->vlan_cnt >= bp->vlan_credit) + return -ENOBUFS; rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); if (rc) { - BNX2X_ERR("Unable to configure VLAN %d\n", vlan->vid); - vlan->hw = false; - rc = -EINVAL; - continue; + BNX2X_ERR("Unable to config VLAN %d\n", vlan->vid); + return rc; } + + DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", vlan->vid); + vlan->hw = true; + bp->vlan_cnt++; } - return rc; + return 0; +} + +static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode) +{ + bool need_accept_any_vlan; + + need_accept_any_vlan = !!bnx2x_vlan_configure_vid_list(bp); + + if (bp->accept_any_vlan != need_accept_any_vlan) { + bp->accept_any_vlan = need_accept_any_vlan; + DP(NETIF_MSG_IFUP, "Accept all VLAN %s\n", + bp->accept_any_vlan ? "raised" : "cleared"); + if (set_rx_mode) { + if (IS_PF(bp)) + bnx2x_set_rx_mode_inner(bp); + else + bnx2x_vfpf_storm_rx_mode(bp); + } + } +} + +int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* The hw forgot all entries after reload */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + bp->vlan_cnt = 0; + + /* Don't set rx mode here. Our caller will do it. */ + bnx2x_vlan_configure(bp, false); + + return 0; } static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; - bool hw = false; - int rc = 0; - - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } DP(NETIF_MSG_IFUP, "Adding VLAN %d\n", vid); @@ -12948,93 +12967,47 @@ static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) if (!vlan) return -ENOMEM; - bp->vlan_cnt++; - if (bp->vlan_cnt > bp->vlan_credit && !bp->accept_any_vlan) { - DP(NETIF_MSG_IFUP, "Accept all VLAN raised\n"); - bp->accept_any_vlan = true; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } else if (bp->vlan_cnt <= bp->vlan_credit) { - rc = __bnx2x_vlan_configure_vid(bp, vid, true); - hw = true; - } - vlan->vid = vid; - vlan->hw = hw; + vlan->hw = false; + list_add_tail(&vlan->link, &bp->vlan_reg); - if (!rc) { - list_add(&vlan->link, &bp->vlan_reg); - } else { - bp->vlan_cnt--; - kfree(vlan); - } + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); - DP(NETIF_MSG_IFUP, "Adding VLAN result %d\n", rc); - - return rc; + return 0; } static int bnx2x_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; + bool found = false; int rc = 0; - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } - DP(NETIF_MSG_IFUP, "Removing VLAN %d\n", vid); - if (!bp->vlan_cnt) { - BNX2X_ERR("Unable to kill VLAN %d\n", vid); - return -EINVAL; - } - list_for_each_entry(vlan, &bp->vlan_reg, link) - if (vlan->vid == vid) + if (vlan->vid == vid) { + found = true; break; + } - if (vlan->vid != vid) { + if (!found) { BNX2X_ERR("Unable to kill VLAN %d - not found\n", vid); return -EINVAL; } - if (vlan->hw) + if (netif_running(dev) && vlan->hw) { rc = __bnx2x_vlan_configure_vid(bp, vid, false); + DP(NETIF_MSG_IFUP, "HW deconfigured for VLAN %d\n", vid); + bp->vlan_cnt--; + } list_del(&vlan->link); kfree(vlan); - bp->vlan_cnt--; - - if (bp->vlan_cnt <= bp->vlan_credit && bp->accept_any_vlan) { - /* Configure all non-configured entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) { - if (vlan->hw) - continue; - - rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); - if (rc) { - BNX2X_ERR("Unable to config VLAN %d\n", - vlan->vid); - continue; - } - DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", - vlan->vid); - vlan->hw = true; - } - DP(NETIF_MSG_IFUP, "Accept all VLAN Removed\n"); - bp->accept_any_vlan = false; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); DP(NETIF_MSG_IFUP, "Removing VLAN result %d\n", rc); From d865177a5e749827f248f6363f5100d3a2f66b0f Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 3 Jun 2016 17:41:42 -0700 Subject: [PATCH 0230/1050] Btrfs: clear uptodate flags of pages in sys_array eb We set uptodate flag to pages in the temporary sys_array eb, but do not clear the flag after free eb. As the special btree inode may still hold a reference on those pages, the uptodate flag can remain alive in them. If btrfs_super_chunk_root has been intentionally changed to the offset of this sys_array eb, reading chunk_root will read content of sys_array and it will skip our beautiful checks in btree_readpage_end_io_hook() because of "pages of eb are uptodate => eb is uptodate" This adds the 'clear uptodate' part to force it to read from disk. Reviewed-by: Josef Bacik Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 673c72ab4fbe..42ccde43053b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6631,12 +6631,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_array_offset += len; cur_offset += len; } + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return ret; out_short_read: printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", len, cur_offset); + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return -EIO; } From 99e3ecfcb9f4ca35192d20a5bea158b81f600062 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 3 Jun 2016 12:05:14 -0700 Subject: [PATCH 0231/1050] Btrfs: add more validation checks for superblock This adds validation checks for super_total_bytes, super_bytes_used and super_stripesize, super_num_devices. Reported-by: Vegard Nossum Reported-by: Quentin Casasnovas Reviewed-by: David Sterba Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 11 +++++++++++ fs/btrfs/volumes.c | 24 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6628fca9f4ed..2bd5f5e975cc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4130,6 +4130,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + btrfs_err(fs_info, "bytes_used is too small %llu", + btrfs_super_bytes_used(sb)); + ret = -EINVAL; + } + if (!is_power_of_2(btrfs_super_stripesize(sb)) || + btrfs_super_stripesize(sb) != sectorsize) { + btrfs_err(fs_info, "invalid stripesize %u", + btrfs_super_stripesize(sb)); + ret = -EINVAL; + } if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", btrfs_super_num_devices(sb)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 42ccde43053b..fd5c9e69894a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6651,6 +6651,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) struct btrfs_key found_key; int ret; int slot; + u64 total_dev = 0; root = root->fs_info->chunk_root; @@ -6692,6 +6693,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) ret = read_one_dev(root, leaf, dev_item); if (ret) goto error; + total_dev++; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -6701,6 +6703,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) } path->slots[0]++; } + + /* + * After loading chunk tree, we've got all device information, + * do another round of validation checks. + */ + if (total_dev != root->fs_info->fs_devices->total_devices) { + btrfs_err(root->fs_info, + "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_super_num_devices(root->fs_info->super_copy), + total_dev); + ret = -EINVAL; + goto error; + } + if (btrfs_super_total_bytes(root->fs_info->super_copy) < + root->fs_info->fs_devices->total_rw_bytes) { + btrfs_err(root->fs_info, + "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu", + btrfs_super_total_bytes(root->fs_info->super_copy), + root->fs_info->fs_devices->total_rw_bytes); + ret = -EINVAL; + goto error; + } ret = 0; error: unlock_chunks(root); From e06cd3dd7cea50e87663a88acdfdb7ac1c53a5ca Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 3 Jun 2016 12:05:15 -0700 Subject: [PATCH 0232/1050] Btrfs: add validadtion checks for chunk loading To prevent fuzzed filesystem images from panic the whole system, we need various validation checks to refuse to mount such an image if btrfs finds any invalid value during loading chunks, including both sys_array and regular chunks. Note that these checks may not be sufficient to cover all corner cases, feel free to add more checks. Reported-by: Vegard Nossum Reported-by: Quentin Casasnovas Reviewed-by: David Sterba Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 110 +++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 29 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fd5c9e69894a..74507b05061b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6251,6 +6251,73 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } +/* Return -EIO if any error, otherwise return 0. */ +static int btrfs_check_chunk_valid(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (!num_stripes) { + btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", + num_stripes); + return -EIO; + } + if (!IS_ALIGNED(logical, root->sectorsize)) { + btrfs_err(root->fs_info, + "invalid chunk logical %llu", logical); + return -EIO; + } + if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) { + btrfs_err(root->fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } + if (!length || !IS_ALIGNED(length, root->sectorsize)) { + btrfs_err(root->fs_info, + "invalid chunk length %llu", length); + return -EIO; + } + if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { + btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", + stripe_len); + return -EIO; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + type) { + btrfs_err(root->fs_info, "unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + btrfs_err(root->fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -6271,35 +6338,10 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - /* Validation check */ - if (!num_stripes) { - btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", - num_stripes); - return -EIO; - } - if (!IS_ALIGNED(logical, root->sectorsize)) { - btrfs_err(root->fs_info, - "invalid chunk logical %llu", logical); - return -EIO; - } - if (!length || !IS_ALIGNED(length, root->sectorsize)) { - btrfs_err(root->fs_info, - "invalid chunk length %llu", length); - return -EIO; - } - if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", - stripe_len); - return -EIO; - } - if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)) { - btrfs_err(root->fs_info, "unrecognized chunk type: %llu", - ~(BTRFS_BLOCK_GROUP_TYPE_MASK | - BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)); - return -EIO; - } + + ret = btrfs_check_chunk_valid(root, leaf, chunk, logical); + if (ret) + return ret; read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); @@ -6547,6 +6589,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 array_size; u32 len = 0; u32 cur_offset; + u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); @@ -6613,6 +6656,15 @@ int btrfs_read_sys_array(struct btrfs_root *root) break; } + type = btrfs_chunk_type(sb, chunk); + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + btrfs_err(root->fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; From 0e4191fe6f2f1a6ac360bcca0066c536f1ff9a6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:27 +0200 Subject: [PATCH 0233/1050] MAINTAINERS: Add file patterns for virtio device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Michael S. Tsirkin Cc: virtualization@lists.linux-foundation.org Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ed42cb65a19b..4cc3aaaf5712 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12131,6 +12131,7 @@ VIRTIO CORE, NET AND BLOCK DRIVERS M: "Michael S. Tsirkin" L: virtualization@lists.linux-foundation.org S: Maintained +F: Documentation/devicetree/bindings/virtio/ F: drivers/virtio/ F: tools/virtio/ F: drivers/net/virtio_net.c From 3b220cf867ce6881c7f671b27564c4cc13d8c4b8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 May 2016 09:12:55 +0300 Subject: [PATCH 0234/1050] tools/virtio/ringtest: add usage example to README Having typical usage example in the README file is more convinient than in the git history... Signed-off-by: Mike Rapoport Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/README | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README index 34e94c46104f..d83707a336c9 100644 --- a/tools/virtio/ringtest/README +++ b/tools/virtio/ringtest/README @@ -1,2 +1,6 @@ Partial implementation of various ring layouts, useful to tune virtio design. Uses shared memory heavily. + +Typical use: + +# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring From ef1b144d23dbd745fd655b7d0a70212fcc8d0121 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 May 2016 13:15:50 +0300 Subject: [PATCH 0235/1050] tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu /dev/cpu is only available on x86 with certain modules (e.g. msr) enabled. Using lscpu to get processors count is more portable. Signed-off-by: Mike Rapoport Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/run-on-all.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh index 52b0f71ffa8d..2e69ca812b4c 100755 --- a/tools/virtio/ringtest/run-on-all.sh +++ b/tools/virtio/ringtest/run-on-all.sh @@ -3,10 +3,10 @@ #use last CPU for host. Why not the first? #many devices tend to use cpu0 by default so #it tends to be busier -HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1) +HOST_AFFINITY=$(lscpu -p=cpu | tail -1) #run command on all cpus -for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n); +for cpu in $(seq 0 $HOST_AFFINITY) do #Don't run guest and host on same CPU #It actually works ok if using signalling From 139ab4d4e68b8cf2a611b06c006a2195dc6bedf1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 24 May 2016 15:21:05 +0300 Subject: [PATCH 0236/1050] tools/virtio: add noring tool Useful to measure testing framework overhead. Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/Makefile | 4 +- tools/virtio/ringtest/noring.c | 69 ++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/ringtest/noring.c diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile index 6ba745529833..6173adae9f08 100644 --- a/tools/virtio/ringtest/Makefile +++ b/tools/virtio/ringtest/Makefile @@ -1,6 +1,6 @@ all: -all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder noring CFLAGS += -Wall CFLAGS += -pthread -O2 -ggdb @@ -15,11 +15,13 @@ ring: ring.o main.o virtio_ring_0_9: virtio_ring_0_9.o main.o virtio_ring_poll: virtio_ring_poll.o main.o virtio_ring_inorder: virtio_ring_inorder.o main.o +noring: noring.o main.o clean: -rm main.o -rm ring.o ring -rm virtio_ring_0_9.o virtio_ring_0_9 -rm virtio_ring_poll.o virtio_ring_poll -rm virtio_ring_inorder.o virtio_ring_inorder + -rm noring.o noring .PHONY: all clean diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c new file mode 100644 index 000000000000..eda2f4824130 --- /dev/null +++ b/tools/virtio/ringtest/noring.c @@ -0,0 +1,69 @@ +#define _GNU_SOURCE +#include "main.h" +#include + +/* stub implementation: useful for measuring overhead */ +void alloc_ring(void) +{ +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + return 0; +} + +/* + * skb_array API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + return "Buffer"; +} + +void poll_used(void) +{ +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +void poll_avail(void) +{ +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + return true; +} + +void call_used(void) +{ + assert(0); +} From 5f9e1059d9347191b271bf7d13bd83db57594d2a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 16 Sep 2015 15:34:53 +0200 Subject: [PATCH 0237/1050] btrfs: advertise which crc32c implementation is being used at module load Since several architectures support hardware-accelerated crc32c calculation, it would be nice to confirm that btrfs is actually using it. We can see an elevated use count for the module, but it doesn't actually show who the users are. This patch simply prints the name of the driver after successfully initializing the shash. Signed-off-by: Jeff Mahoney [ added a helper and used in module load-time message ] Signed-off-by: David Sterba --- fs/btrfs/hash.c | 5 +++++ fs/btrfs/hash.h | 1 + fs/btrfs/super.c | 5 +++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index aae520b2aee5..a97fdc156a03 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -24,6 +24,11 @@ int __init btrfs_hash_init(void) return PTR_ERR_OR_ZERO(tfm); } +const char* btrfs_crc32c_impl(void) +{ + return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm)); +} + void btrfs_hash_exit(void) { crypto_free_shash(tfm); diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 118a2316e5d3..c3a2ec554361 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -22,6 +22,7 @@ int __init btrfs_hash_init(void); void btrfs_hash_exit(void); +const char* btrfs_crc32c_impl(void); u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4e59a91a11e0..4397a303fc6c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2303,7 +2303,7 @@ static void btrfs_interface_exit(void) static void btrfs_print_mod_info(void) { - printk(KERN_INFO "Btrfs loaded" + printk(KERN_INFO "Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif @@ -2313,7 +2313,8 @@ static void btrfs_print_mod_info(void) #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY ", integrity-checker=on" #endif - "\n"); + "\n", + btrfs_crc32c_impl()); } static int btrfs_run_sanity_tests(void) From 08dd8cd06ed95625b9e2fac43c78fcb45b7eaf94 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 3 Jun 2016 19:00:59 +0100 Subject: [PATCH 0238/1050] x86/msr: Use the proper trace point conditional for writes The msr tracing for writes is incorrectly conditional on the read trace. Fixes: 7f47d8cc039f "x86, tracing, perf: Add trace point for MSR accesses" Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1464976859-21850-1-git-send-email-dgilbert@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7dc1d8fef7fd..b5fee97813cd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -122,7 +122,7 @@ notrace static inline void native_write_msr(unsigned int msr, "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) : : "c" (msr), "a"(low), "d" (high) : "memory"); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } @@ -141,7 +141,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr, : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } From b581755b1c565391c72d03b157ba2dd0b18e9d15 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 6 Jun 2016 16:21:37 +0200 Subject: [PATCH 0239/1050] ovl: xattr filter fix a) ovl_need_xattr_filter() is wrong, we can have multiple lower layers overlaid, all of which (except the lowest one) honouring the "trusted.overlay.opaque" xattr. So need to filter everything except the bottom and the pure-upper layer. b) we no longer can assume that inode is attached to dentry in get/setxattr. This patch unconditionally filters private xattrs to fix both of the above. Performance impact for get/removexattrs is likely in the noise. For listxattrs it might be measurable in pathological cases, but I very much hope nobody cares. If they do, we'll fix it then. Reported-by: Vivek Goyal Signed-off-by: Miklos Szeredi Fixes: b96809173e94 ("security_d_instantiate(): move to the point prior to attaching dentry to inode") --- fs/overlayfs/inode.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 0ed7c4012437..1dbeab6cf96e 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -238,41 +238,27 @@ out: return err; } -static bool ovl_need_xattr_filter(struct dentry *dentry, - enum ovl_path_type type) -{ - if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) - return S_ISDIR(dentry->d_inode->i_mode); - else - return false; -} - ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(realpath.dentry, name, value, size); + return vfs_getxattr(realdentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; int off; - res = vfs_listxattr(realpath.dentry, list, size); + res = vfs_listxattr(realdentry, list, size); if (res <= 0 || size == 0) return res; - if (!ovl_need_xattr_filter(dentry, type)) - return res; - /* filter out private xattrs */ for (off = 0; off < res;) { char *s = list + off; @@ -302,7 +288,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name) goto out; err = -ENODATA; - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) goto out_drop_write; if (!OVL_TYPE_UPPER(type)) { From ed9e4afdb0551e3ef4ee8433fe664433a20ef73a Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:26 +0800 Subject: [PATCH 0240/1050] Btrfs: self-tests: Execute page straddling test only when nodesize < PAGE_SIZE On ppc64, PAGE_SIZE is 64k which is same as BTRFS_MAX_METADATA_BLOCKSIZE. In such a scenario, we will never be able to have an extent buffer containing more than one page. Hence in such cases this commit does not execute the page straddling tests. Reviewed-by: Josef Bacik Signed-off-by: Feifei Xu Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 49 +++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 755fceb2e5f5..2794fed71fa4 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -21,6 +21,7 @@ #include #include #include "btrfs-tests.h" +#include "../ctree.h" #include "../extent_io.h" #define PROCESS_UNLOCK (1 << 0) @@ -298,25 +299,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return -EINVAL; } - bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Setting straddling pages failed\n"); - return -EINVAL; - } + /* Straddling pages test */ + if (len > PAGE_SIZE) { + bitmap_set(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Setting straddling pages failed\n"); + return -EINVAL; + } - bitmap_set(bitmap, 0, len * BITS_PER_BYTE); - bitmap_clear(bitmap, - (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Clearing straddling pages failed\n"); - return -EINVAL; + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + bitmap_clear(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Clearing straddling pages failed\n"); + return -EINVAL; + } } /* @@ -359,7 +364,13 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) int ret; test_msg("Running extent buffer bitmap tests\n"); - len = sectorsize * 4; + + /* + * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than + * BTRFS_MAX_METADATA_BLOCKSIZE. + */ + len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE) + ? sectorsize * 4 : sectorsize; bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { From d94f43b4c6b88b9393422868d8dfde14c5923858 Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:27 +0800 Subject: [PATCH 0241/1050] Btrfs: self-tests: Support testing all possible sectorsizes and nodesizes To test all possible sectorsizes, this commit adds a sectorsize array. This commit executes the tests for all possible sectorsizes and nodesizes. Reviewed-by: Josef Bacik Signed-off-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/super.c | 54 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5b0b354ca594..c49d7ae69617 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2318,32 +2318,42 @@ static void btrfs_print_mod_info(void) static int btrfs_run_sanity_tests(void) { - int ret; + int ret, i; u32 sectorsize, nodesize; - - sectorsize = PAGE_SIZE; - nodesize = PAGE_SIZE; + u32 test_sectorsize[] = { + PAGE_SIZE, + }; ret = btrfs_init_test_fs(); if (ret) return ret; - - ret = btrfs_test_free_space_cache(sectorsize, nodesize); - if (ret) - goto out; - ret = btrfs_test_extent_buffer_operations(sectorsize, - nodesize); - if (ret) - goto out; - ret = btrfs_test_extent_io(sectorsize, nodesize); - if (ret) - goto out; - ret = btrfs_test_inodes(sectorsize, nodesize); - if (ret) - goto out; - ret = btrfs_test_qgroups(sectorsize, nodesize); - if (ret) - goto out; - ret = btrfs_test_free_space_tree(sectorsize, nodesize); + for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) { + sectorsize = test_sectorsize[i]; + for (nodesize = sectorsize; + nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE; + nodesize <<= 1) { + pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n", + sectorsize, nodesize); + ret = btrfs_test_free_space_cache(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_buffer_operations(sectorsize, + nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_io(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_inodes(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_qgroups(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(sectorsize, nodesize); + if (ret) + goto out; + } + } out: btrfs_destroy_test_fs(); return ret; From ef9f2db365c31433e52b0c5863793273bb632666 Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:28 +0800 Subject: [PATCH 0242/1050] Btrfs: self-tests: Use macros instead of constants and add missing newline This commit replaces numerical constants with appropriate preprocessor macros. Reviewed-by: Josef Bacik Signed-off-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/qgroup-tests.c | 44 ++++++++++++++++++---------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06ef433552b3..c3764ddb2a46 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1323,7 +1323,8 @@ struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) if (!root) return ERR_PTR(-ENOMEM); /* We don't use the stripesize in selftest, set it as sectorsize */ - __setup_root(nodesize, sectorsize, sectorsize, root, NULL, 1); + __setup_root(nodesize, sectorsize, sectorsize, root, NULL, + BTRFS_ROOT_TREE_OBJECTID); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index 17b110f39f06..4f8cbd1ec5ee 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -225,6 +225,6 @@ out: int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize) { - test_msg("Running extent buffer operation tests"); + test_msg("Running extent buffer operation tests\n"); return test_btrfs_split_item(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 9c1d27352f19..57a12c0d680b 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -229,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, btrfs_init_dummy_trans(&trans); test_msg("Qgroup basic add\n"); - ret = btrfs_create_qgroup(NULL, fs_info, 5); + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; @@ -247,7 +247,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } - ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; @@ -266,7 +267,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, nodesize)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -299,7 +301,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -325,8 +327,11 @@ static int test_multiple_refs(struct btrfs_root *root, test_msg("Qgroup multiple refs test\n"); - /* We have 5 created already from the previous test */ - ret = btrfs_create_qgroup(NULL, fs_info, 256); + /* + * We have BTRFS_FS_TREE_OBJECTID created already from the + * previous test. + */ + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; @@ -339,7 +344,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; @@ -358,7 +364,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; @@ -371,7 +377,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = add_tree_ref(root, nodesize, nodesize, 0, 256); + ret = add_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; @@ -390,12 +397,14 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 256, nodesize, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -407,7 +416,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = remove_extent_ref(root, nodesize, nodesize, 0, 256); + ret = remove_extent_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; @@ -426,12 +436,14 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, nodesize, nodesize)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -490,7 +502,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) goto out; } - tmp_root->root_key.objectid = 5; + tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID; root->fs_info->fs_root = tmp_root; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { @@ -505,7 +517,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) goto out; } - tmp_root->root_key.objectid = 256; + tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { test_msg("Couldn't insert fs root %d\n", ret); From 36b3dc05b4650e81eca7d60d548a92b014595eb1 Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:29 +0800 Subject: [PATCH 0243/1050] Btrfs: self-tests: Fix test_bitmaps fail on 64k sectorsize With 64K sectorsize, 1G sized block group cannot span across bitmaps. To execute test_bitmaps() function, this commit allocates "BITS_PER_BITMAP * sectorsize + PAGE_SIZE" sized block group. Reviewed-by: Josef Bacik Reviewed-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/tests/free-space-tests.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 029343b3e854..3956bb2ff84c 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -843,7 +843,13 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) test_msg("Running btrfs free space cache tests\n"); - cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024, sectorsize); + /* + * For ppc64 (with 64k page size), bytes per bitmap might be + * larger than 1G. To make bitmap test available in ppc64, + * alloc dummy block group whose size cross bitmaps. + */ + cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize + + PAGE_SIZE, sectorsize); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; From 34b3e6c92af1fa3f7067e4fa05ffa9d8bd41c96c Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Wed, 1 Jun 2016 19:18:30 +0800 Subject: [PATCH 0244/1050] Btrfs: self-tests: Fix extent buffer bitmap test fail on BE system In __test_eb_bitmaps(), we write random data to a bitmap. Then copy the bitmap to another bitmap that resides inside an extent buffer. Later we verify the values of corresponding bits in the bitmap and the bitmap inside the extent buffer. However, extent_buffer_test_bit() reads in byte granularity while test_bit() reads in unsigned long granularity. Hence we end up comparing wrong bits on big-endian systems such as ppc64. This commit fixes the issue by reading the bitmap in byte granularity. Reviewed-by: Josef Bacik Reviewed-by: Chandan Rajendra Signed-off-by: Feifei Xu Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 2794fed71fa4..d19ab0317283 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -273,6 +273,16 @@ out: return ret; } +/** + * test_bit_in_byte - Determine whether a bit is set in a byte + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit_in_byte(int nr, const u8 *addr) +{ + return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1))); +} + static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, unsigned long len) { @@ -338,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, for (i = 0; i < len * BITS_PER_BYTE; i++) { int bit, bit1; - bit = !!test_bit(i, bitmap); + bit = !!test_bit_in_byte(i, (u8 *)bitmap); bit1 = !!extent_buffer_test_bit(eb, 0, i); if (bit1 != bit) { test_msg("Testing bit pattern failed\n"); From fee48cf8374569a3888fd8c8536283e6067f0cfb Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 1 Apr 2016 14:12:12 -0700 Subject: [PATCH 0245/1050] ath10k: fix deadlock when peer cannot be created We must not attempt to send WMI packets while holding the data-lock, as it may deadlock: BUG: sleeping function called from invalid context at drivers/net/wireless/ath/ath10k/wmi.c:1824 in_atomic(): 1, irqs_disabled(): 0, pid: 2878, name: wpa_supplicant ============================================= [ INFO: possible recursive locking detected ] 4.4.6+ #21 Tainted: G W O --------------------------------------------- wpa_supplicant/2878 is trying to acquire lock: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] but task is already holding lock: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_peer_create+0x122/0x1ae [ath10k_core] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&ar->data_lock)->rlock); lock(&(&ar->data_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by wpa_supplicant/2878: #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x12/0x14 #1: (&ar->conf_mutex){+.+.+.}, at: [] ath10k_add_interface+0x3b/0xbda [ath10k_core] #2: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_peer_create+0x122/0x1ae [ath10k_core] #3: (rcu_read_lock){......}, at: [] rcu_read_lock+0x0/0x66 [mac80211] stack backtrace: CPU: 3 PID: 2878 Comm: wpa_supplicant Tainted: G W O 4.4.6+ #21 Hardware name: To be filled by O.E.M. To be filled by O.E.M./ChiefRiver, BIOS 4.6.5 06/07/2013 0000000000000000 ffff8801fcadf8f0 ffffffff8137086d ffffffff82681720 ffffffff82681720 ffff8801fcadf9b0 ffffffff8112e3be ffff8801fcadf920 0000000100000000 ffffffff82681720 ffffffffa0721500 ffff8801fcb8d348 Call Trace: [] dump_stack+0x81/0xb6 [] __lock_acquire+0xc5b/0xde7 [] ? ath10k_wmi_tx_beacons_iter+0x15/0x11a [ath10k_core] [] ? mark_lock+0x24/0x201 [] lock_acquire+0x132/0x1cb [] ? lock_acquire+0x132/0x1cb [] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] _raw_spin_lock_bh+0x31/0x40 [] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] __iterate_interfaces+0x9d/0x13d [mac80211] [] ieee80211_iterate_active_interfaces_atomic+0x32/0x3e [mac80211] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] ath10k_wmi_tx_beacons_nowait.isra.13+0x14/0x16 [ath10k_core] [] ath10k_wmi_cmd_send+0x71/0x242 [ath10k_core] [] ath10k_wmi_peer_delete+0x3f/0x42 [ath10k_core] [] ath10k_peer_create+0x15e/0x1ae [ath10k_core] [] ath10k_add_interface+0x70d/0xbda [ath10k_core] [] drv_add_interface+0x123/0x1a5 [mac80211] [] ieee80211_do_open+0x351/0x667 [mac80211] [] ieee80211_open+0x49/0x4c [mac80211] [] __dev_open+0x88/0xde [] __dev_change_flags+0xa4/0x13a [] dev_change_flags+0x1f/0x54 [] devinet_ioctl+0x2b9/0x5c9 [] ? copy_to_user+0x32/0x38 [] inet_ioctl+0x81/0x9d [] ? inet_ioctl+0x81/0x9d [] sock_do_ioctl+0x20/0x3d [] sock_ioctl+0x222/0x22e [] do_vfs_ioctl+0x453/0x4d7 [] ? __sys_recvmsg+0x4c/0x5b [] ? __fget_light+0x48/0x6c [] SyS_ioctl+0x52/0x74 [] entry_SYSCALL_64_fastpath+0x16/0x7a Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 6dd1d26b357f..4040f9413e86 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -679,10 +679,10 @@ static int ath10k_peer_create(struct ath10k *ar, peer = ath10k_peer_find(ar, vdev_id, addr); if (!peer) { + spin_unlock_bh(&ar->data_lock); ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n", addr, vdev_id); ath10k_wmi_peer_delete(ar, vdev_id, addr); - spin_unlock_bh(&ar->data_lock); return -ENOENT; } From b10c22e5f9902a329450c2027e9291b71e9f1602 Mon Sep 17 00:00:00 2001 From: Herve Jourdain Date: Wed, 1 Jun 2016 02:24:46 +0800 Subject: [PATCH 0246/1050] drm/vc4: Fix ioctl permissions for render nodes. Contrary to other flags to DRM_IOCTL_DEF_DRV(), which restrict usage, the flag for render node is an enabler (the IOCTL can't be used from render node if it's not present). So DRM_RENDER_ALLOW needs to be added to all the flags that were previously 0. Signed-off-by: Herve Jourdain Reviewed-by: Eric Anholt Fixes: 0cd3e2747662 ("drm/vc4: Add missing render node support") --- drivers/gpu/drm/vc4/vc4_drv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ef7de8eb2b63..250ed7e3754c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -66,12 +66,12 @@ static const struct file_operations vc4_drm_fops = { }; static const struct drm_ioctl_desc vc4_drm_ioctls[] = { - DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), }; From 56d1fe0979dc9b73c1c12ee07722ac380d42a0c4 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 18 May 2016 14:02:46 +0200 Subject: [PATCH 0247/1050] drm/vc4: Make pageflip completion handling more robust. Protect both the setup of the pageflip event and the latching of the new requested displaylist head pointer by the event lock, so we can't get into a situation where vc4_atomic_flush latches the new display list via HVS_WRITE, then immediately gets preempted before queueing the pageflip event, then the page-flip completes in hw and the vc4_crtc_handle_page_flip() runs and no-ops due to lack of a pending pageflip event, then vc4_atomic_flush continues and only then queues the pageflip event - after the page flip handling already no-oped. This would cause flip completion handling only at the next vblank - one frame too late. In vc4_crtc_handle_page_flip() check the actual DL head pointer in SCALER_DISPLACTX against the requested pointer for page flip to make sure that the flip actually really completed in the current vblank and doesn't get deferred to the next one because the DL head pointer was written a bit too late into SCALER_DISPLISTX, after start of vblank, and missed the boat. This avoids handling a pageflip completion too early - one frame too early. According to Eric, DL head pointer updates which were written into the HVS DISPLISTX reg get committed to hardware at the last pixel of active scanout. Our vblank interrupt handler, as triggered by PV_INT_VFP_START irq, gets to run earliest at the first pixel of HBLANK at the end of the last scanline of active scanout, ie. vblank irq handling runs at least 1 pixel duration after a potential pageflip completion happened in hardware. This ordering of events in the hardware, together with the lock protection and SCALER_DISPLACTX sampling of this patch, guarantees that pageflip completion handling only runs at exactly the vblank irq of actual pageflip completion in all cases. Background info from Eric about the relative timing of HVS, PV's and trigger points for interrupts, DL updates: https://lists.freedesktop.org/archives/dri-devel/2016-May/107510.html Tested on RPi 2B with hardware timing measurement equipment and shown to no longer complete flips too early or too late. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 28 ++++++++++++++++++---------- drivers/gpu/drm/vc4/vc4_regs.h | 4 ++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index e9befb6d10fd..0f18b76c7906 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -456,14 +456,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - vc4_state->mm.start); - - if (debug_dump_regs) { - DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); - vc4_hvs_dump_state(dev); - } - if (crtc->state->event) { unsigned long flags; @@ -473,8 +465,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); vc4_crtc->event = crtc->state->event; - spin_unlock_irqrestore(&dev->event_lock, flags); crtc->state->event = NULL; + + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + + spin_unlock_irqrestore(&dev->event_lock, flags); + } else { + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + } + + if (debug_dump_regs) { + DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); + vc4_hvs_dump_state(dev); } } @@ -500,10 +504,14 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) { struct drm_crtc *crtc = &vc4_crtc->base; struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + u32 chan = vc4_crtc->channel; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); - if (vc4_crtc->event) { + if (vc4_crtc->event && + (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; drm_crtc_vblank_put(crtc); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 6163b95c5411..f99eece4cc97 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -341,6 +341,10 @@ #define SCALER_DISPLACT0 0x00000030 #define SCALER_DISPLACT1 0x00000034 #define SCALER_DISPLACT2 0x00000038 +#define SCALER_DISPLACTX(x) (SCALER_DISPLACT0 + \ + (x) * (SCALER_DISPLACT1 - \ + SCALER_DISPLACT0)) + #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) From a8953c52b95167b5d21a66f0859751570271d834 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Jun 2016 14:37:40 +1000 Subject: [PATCH 0248/1050] drm/nouveau/disp/sor/gf119: both links use the same training register It appears that, for whatever reason, both link A and B use the same register to control the training pattern. It's a little odd, as the GPUs before this (Tesla/Fermi1) have per-link registers, as do newer GPUs (Maxwell). Fixes the third DP output on NVS 510 (GK107). Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index b4b41b135643..5111560b4809 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -40,8 +40,7 @@ static int gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) { struct nvkm_device *device = outp->base.disp->engine.subdev.device; - const u32 loff = gf119_sor_loff(outp); - nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern); + nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern); return 0; } From 4691409b3e2250ed66aa8dcefa23fe765daf7add Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Jun 2016 15:05:52 +1000 Subject: [PATCH 0249/1050] drm/nouveau/disp/sor/gm107: training pattern registers are like gm200 Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- .../gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/engine/disp/gm107.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/outpdp.h | 9 +++- .../drm/nouveau/nvkm/engine/disp/sorgf119.c | 2 +- .../drm/nouveau/nvkm/engine/disp/sorgm107.c | 53 +++++++++++++++++++ .../drm/nouveau/nvkm/engine/disp/sorgm200.c | 15 +----- 6 files changed, 64 insertions(+), 18 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index a74c5dd27dc0..e2a64ed14b22 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o nvkm-y += nvkm/engine/disp/sornv50.o nvkm-y += nvkm/engine/disp/sorg94.o nvkm-y += nvkm/engine/disp/sorgf119.o +nvkm-y += nvkm/engine/disp/sorgm107.o nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/dport.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c index b6944142d616..f4b9cf8574be 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c @@ -36,7 +36,7 @@ gm107_disp = { .outp.internal.crt = nv50_dac_output_new, .outp.internal.tmds = nv50_sor_output_new, .outp.internal.lvds = nv50_sor_output_new, - .outp.internal.dp = gf119_sor_dp_new, + .outp.internal.dp = gm107_sor_dp_new, .dac.nr = 3, .dac.power = nv50_dac_power, .dac.sense = nv50_dac_sense, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h index e9067ba4e179..4e983f6d7032 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h @@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int); int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, struct nvkm_output **); int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool); +int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int); -int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, - struct nvkm_output **); +int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, + struct nvkm_output **); +int gm107_sor_dp_pattern(struct nvkm_output_dp *, int); + +int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, + struct nvkm_output **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index 5111560b4809..22706c0a54b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -63,7 +63,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef) return 0; } -static int +int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp, int ln, int vs, int pe, int pc) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c new file mode 100644 index 000000000000..37790b2617c5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c @@ -0,0 +1,53 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "nv50.h" +#include "outpdp.h" + +int +gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) +{ + struct nvkm_device *device = outp->base.disp->engine.subdev.device; + const u32 soff = outp->base.or * 0x800; + const u32 data = 0x01010101 * pattern; + if (outp->base.info.sorconf.link & 1) + nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data); + else + nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data); + return 0; +} + +static const struct nvkm_output_dp_func +gm107_sor_dp_func = { + .pattern = gm107_sor_dp_pattern, + .lnk_pwr = g94_sor_dp_lnk_pwr, + .lnk_ctl = gf119_sor_dp_lnk_ctl, + .drv_ctl = gf119_sor_dp_drv_ctl, +}; + +int +gm107_sor_dp_new(struct nvkm_disp *disp, int index, + struct dcb_output *dcbE, struct nvkm_output **poutp) +{ + return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c index 2cfbef9c344f..c44fa7ea672a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c @@ -56,19 +56,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane) return lane * 0x08; } -static int -gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) -{ - struct nvkm_device *device = outp->base.disp->engine.subdev.device; - const u32 soff = gm200_sor_soff(outp); - const u32 data = 0x01010101 * pattern; - if (outp->base.info.sorconf.link & 1) - nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data); - else - nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data); - return 0; -} - static int gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr) { @@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp, static const struct nvkm_output_dp_func gm200_sor_dp_func = { - .pattern = gm200_sor_dp_pattern, + .pattern = gm107_sor_dp_pattern, .lnk_pwr = gm200_sor_dp_lnk_pwr, .lnk_ctl = gf119_sor_dp_lnk_ctl, .drv_ctl = gm200_sor_dp_drv_ctl, From fc100a7f89da85da8edd9c2e6f6e8b2490d74ae1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 19:19:20 +0200 Subject: [PATCH 0250/1050] soreuseport: Fix reuseport_bpf testcase on 32bit architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following compiler warnings when compiling the reuseport_bpf testcase on a 32 bit platform: reuseport_bpf.c: In function ‘attach_ebpf’: reuseport_bpf.c:114:15: warning: cast from pointer to integer of ifferent size [-Wpointer-to-int-cast] Signed-off-by: Helge Deller Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 96ba386b1b7b..4a8217448f20 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -111,9 +111,9 @@ static void attach_ebpf(int fd, uint16_t mod) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insn_cnt = ARRAY_SIZE(prog); - attr.insns = (uint64_t)prog; - attr.license = (uint64_t)bpf_license; - attr.log_buf = (uint64_t)bpf_log_buf; + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; attr.log_size = sizeof(bpf_log_buf); attr.log_level = 1; attr.kern_version = 0; @@ -351,8 +351,8 @@ static void test_filter_no_reuseport(const struct test_params p) memset(&eprog, 0, sizeof(eprog)); eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; eprog.insn_cnt = ARRAY_SIZE(ecode); - eprog.insns = (uint64_t)ecode; - eprog.license = (uint64_t)bpf_license; + eprog.insns = (unsigned long) &ecode; + eprog.license = (unsigned long) &bpf_license; eprog.kern_version = 0; memset(&cprog, 0, sizeof(cprog)); From 1957598840f47d42bb0b7f8a871717a780708686 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 23:49:17 +0200 Subject: [PATCH 0251/1050] soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF Commit 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") missed to add the compat case for the SO_ATTACH_REUSEPORT_CBPF option. Signed-off-by: Helge Deller Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/compat.c b/net/compat.c index 5cfd26a0006f..1373947efb50 100644 --- a/net/compat.c +++ b/net/compat.c @@ -354,7 +354,8 @@ static int do_set_sock_timeout(struct socket *sock, int level, static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - if (optname == SO_ATTACH_FILTER) + if (optname == SO_ATTACH_FILTER || + optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) From 943f44d94aa26bfdcaafc40d3701e24eeb58edce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Mar 2016 08:33:16 -0700 Subject: [PATCH 0252/1050] IB/cm: Fix a recently introduced locking bug ib_cm_notify() can be called from interrupt context. Hence do not reenable interrupts unconditionally in cm_establish(). This patch avoids that lockdep reports the following warning: WARNING: CPU: 0 PID: 23317 at kernel/locking/lockdep.c:2624 trace _hardirqs_on_caller+0x112/0x1b0 DEBUG_LOCKS_WARN_ON(current->hardirq_context) Call Trace: [] dump_stack+0x67/0x92 [] __warn+0xc1/0xe0 [] warn_slowpath_fmt+0x4a/0x50 [] trace_hardirqs_on_caller+0x112/0x1b0 [] trace_hardirqs_on+0xd/0x10 [] _raw_spin_unlock_irq+0x27/0x40 [] ib_cm_notify+0x25c/0x290 [ib_cm] [] srpt_qp_event+0xa1/0xf0 [ib_srpt] [] mlx4_ib_qp_event+0x67/0xd0 [mlx4_ib] [] mlx4_qp_event+0x5a/0xc0 [mlx4_core] [] mlx4_eq_int+0x3d8/0xcf0 [mlx4_core] [] mlx4_msi_x_interrupt+0xc/0x20 [mlx4_core] [] handle_irq_event_percpu+0x64/0x100 [] handle_irq_event+0x34/0x60 [] handle_edge_irq+0x6a/0x150 [] handle_irq+0x15/0x20 [] do_IRQ+0x5c/0x110 [] common_interrupt+0x89/0x89 [] blk_run_queue_async+0x37/0x40 [] rq_completed+0x43/0x70 [dm_mod] [] dm_softirq_done+0x176/0x280 [dm_mod] [] blk_done_softirq+0x52/0x90 [] __do_softirq+0x10f/0x230 [] irq_exit+0xa8/0xb0 [] smp_trace_call_function_single_interrupt+0x2e/0x30 [] smp_call_function_single_interrupt+0x9/0x10 [] call_function_single_interrupt+0x89/0x90 Fixes: commit be4b499323bf (IB/cm: Do not queue work to a device that's going away) Signed-off-by: Bart Van Assche Cc: Erez Shitrit Cc: Sean Hefty Cc: Nikolay Borisov Cc: stable # v4.2+ Acked-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1d92e091e22e..c99525512b34 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id) work->cm_event.event = IB_CM_USER_ESTABLISHED; /* Check if the device started its remove_one */ - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) { queue_delayed_work(cm.wq, &work->work, 0); } else { kfree(work); ret = -ENODEV; } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); out: return ret; From a8b7da58ec81e74a3a6982e2dba24f899b56c915 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 May 2016 08:01:20 +0300 Subject: [PATCH 0253/1050] IB/hfi1: fix some indenting That extra tabs are misleading. Signed-off-by: Dan Carpenter Reviewed-by: Bart Van Assche Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 5cc492e5776d..0d28a5a40fae 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1337,7 +1337,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, dd->rcvhdrtail_dummy_physaddr); - dd->rcvhdrtail_dummy_kvaddr = NULL; + dd->rcvhdrtail_dummy_kvaddr = NULL; } for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) { From f242d93ae92032f78840471e5c2bfc2d04ae324c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 31 May 2016 10:54:36 +0300 Subject: [PATCH 0254/1050] IB/hfi1: Avoid large frame size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_FRAME_WARN is set to 1024 bytes, which is useful to find stack consumers, we get a warning in hfi1 driver. drivers/infiniband/hw/hfi1/affinity.c: In function ‘hfi1_get_proc_affinity’: drivers/infiniband/hw/hfi1/affinity.c:415:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=] This change removes unneeded buf[1024] declaration and usage. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 31 ++++++++++++--------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050ab9e16..14d7eeb09be6 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -300,16 +300,15 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) const struct cpumask *node_mask, *proc_mask = tsk_cpus_allowed(current); struct cpu_mask_set *set = &dd->affinity->proc; - char buf[1024]; /* * check whether process/context affinity has already * been set */ if (cpumask_weight(proc_mask) == 1) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); /* * Mark the pre-set CPU as used. This is atomic so we don't * need the lock @@ -318,9 +317,9 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_set_cpu(cpu, &set->used); goto done; } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); goto done; } @@ -356,8 +355,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? &dd->affinity->rcv_intr.mask : &dd->affinity->rcv_intr.used)); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs)); - hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf); + hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", + cpumask_pr_args(intrs)); /* * If we don't have a NUMA node requested, preference is towards @@ -366,18 +365,16 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) if (node == -1) node = dd->node; node_mask = cpumask_of_node(node); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask)); - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf); + hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, + cpumask_pr_args(node_mask)); /* diff will hold all unused cpus */ cpumask_andnot(diff, &set->mask, &set->used); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff)); - hfi1_cdbg(PROC, "unused CPUs (all) %s", buf); + hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); /* get cpumask of available CPUs on preferred NUMA */ cpumask_and(mask, diff, node_mask); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "available cpus on NUMA %s", buf); + hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); /* * At first, we don't want to place processes on the same @@ -395,8 +392,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_andnot(diff, &set->mask, &set->used); cpumask_andnot(mask, diff, node_mask); } - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "possible CPUs for process %s", buf); + hfi1_cdbg(PROC, "possible CPUs for process %*pbl", + cpumask_pr_args(mask)); cpu = cpumask_first(mask); if (cpu >= nr_cpu_ids) /* empty */ From da1f857be62ca0472024da37eab068b3b8ce0a15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2016 19:05:56 +0300 Subject: [PATCH 0255/1050] IB/core: fix an error code in ib_core_init() We should return the error code if ib_add_ibnl_clients() fails. The current code returns success. Fixes: 735c631ae99d ('IB/core: Register SA ibnl client during ib_core initialization') Signed-off-by: Dan Carpenter Reviewed-by: Mark Bloch Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5516fb070344..8b8a8d9cf134 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1024,7 +1024,8 @@ static int __init ib_core_init(void) goto err_mad; } - if (ib_add_ibnl_clients()) { + ret = ib_add_ibnl_clients(); + if (ret) { pr_warn("Couldn't register ibnl clients\n"); goto err_sa; } From 0147ebcf8927f09e1923114092f6b14c1de75a95 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 19:06:36 +0100 Subject: [PATCH 0256/1050] IB/core: fix null pointer deref and mem leak in error handling The current error handling in setup_hw_stats has a couple of issues. It is possible to generate a null pointer deference on the kfree of hsag->attrs[i] because two of the early error exit paths jump to the kfree when hsags NULL and not allocated. Fix this by moving the kfree on stats and jumping to that, avoiding the hsag freeing. Secondly, there is a memory leak of stats if the hsag allocation fails; instead of returning, jump to the kfree on stats. Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5e573bb18660..ed04a7bd4481 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -899,14 +899,14 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; if (!stats->names || stats->num_counters <= 0) - goto err; + goto err_free_stats; hsag = kzalloc(sizeof(*hsag) + // 1 extra for the lifespan config entry sizeof(void *) * (stats->num_counters + 1), GFP_KERNEL); if (!hsag) - return; + goto err_free_stats; ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); @@ -946,10 +946,11 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; err: - kfree(stats); for (; i >= 0; i--) kfree(hsag->attrs[i]); kfree(hsag); +err_free_stats: + kfree(stats); return; } From ce67fef68de552b14ae417511a323013b478f78e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Jun 2016 11:45:01 +0200 Subject: [PATCH 0257/1050] IB/usnic: Remove unused DMA attributes The DMA attributes are set but never used. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 7209fbc03ccb..a0b6ebee4d8a 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -112,10 +111,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int i; int flags; dma_addr_t pa; - DEFINE_DMA_ATTRS(attrs); - - if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); if (!can_do_mlock()) return -EPERM; From ca920f5b67f1b798a1f86c675ea441b295bf8464 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 07:58:32 -0700 Subject: [PATCH 0258/1050] IB/mlx4: Fix device managed flow steering support test Perform the test for device managed flow steering support even if memory windows are not supported. I noticed this because smatch reported inconsistent indentation for the device managed flow steering support test. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Yishai Hadas Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b01ef6eee6e8..0eb09e104542 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -505,9 +505,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; - if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; From 249f06561fc333581e48e6d388a56e3d100d23b6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:39:35 -0700 Subject: [PATCH 0259/1050] IB/srp: Always initialize use_fast_reg and use_fmr Avoid that mapping fails due to use_fast_reg != 0 or use_fmr != 0 if both member variables should be zero (if never_register == 1 or if neither FMR nor FR is supported). Remove an initialization that became superfluous due to changing a kmalloc() into a kzalloc() call. Fixes: 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 646de170ec12..bc24b8d32ce6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3526,7 +3526,7 @@ static void srp_add_one(struct ib_device *device) int mr_page_shift, p; u64 max_pages_per_mr; - srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL); + srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) return; @@ -3586,8 +3586,6 @@ static void srp_add_one(struct ib_device *device) IB_ACCESS_REMOTE_WRITE); if (IS_ERR(srp_dev->global_mr)) goto err_pd; - } else { - srp_dev->global_mr = NULL; } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { From 9edba790fc52322051fd7b6589421021f0726483 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:40:24 -0700 Subject: [PATCH 0260/1050] IB/srp: Fix srp_map_sg_dma() Because patch "IB/srp: Move common code into the caller" was applied partially srp_map_sg_dma() doesn't work properly. Fix this by applying the remainder of that patch. See also http://thread.gmane.org/gmane.linux.drivers.rdma/35803/focus=35811. Fixes: 3849e44d1c4b ("IB/srp: Move common code into the caller") Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc24b8d32ce6..3322ed750172 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1457,7 +1457,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, { unsigned int sg_offset = 0; - state->desc = req->indirect_desc; state->fr.next = req->fr_list; state->fr.end = req->fr_list + ch->target->mr_per_cmd; state->sg = scat; @@ -1489,7 +1488,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, struct scatterlist *sg; int i; - state->desc = req->indirect_desc; for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), @@ -1655,6 +1653,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); + state.desc = req->indirect_desc; if (dev->use_fast_reg) ret = srp_map_sg_fr(&state, ch, req, scat, count); else if (dev->use_fmr) From 0270be78da8d27cc5588d2472694aa7ad2c680b3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:00 -0700 Subject: [PATCH 0261/1050] RDMA/core: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Tatyana Nikolova Cc: Steve Wise Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 43e3fa27102b..1c41b95cefec 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -506,7 +506,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, if (!nlmsg_request) { pr_info("%s: Could not find a matching request (seq = %u)\n", __func__, msg_seq); - return -EINVAL; + return -EINVAL; } pm_msg = nlmsg_request->req_buffer; local_sockaddr = (struct sockaddr_storage *) From 2190d10de58101448b0ac360facc0d5166e3d30d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:44 -0700 Subject: [PATCH 0262/1050] IB/mad: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Hal Rosenstock Cc: Ira Weiny Reviewed-By: Ira Weiny Reviewed-by: Hal Rosenstock Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 82fb511112da..2d49228f28b2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1638,9 +1638,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ - kfree(method); - class->method_table[mgmt_class] = NULL; - /* Any management classes left ? */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); From c0a67f6ba356521a8266694ffffa4998264d0cb0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:16 -0700 Subject: [PATCH 0263/1050] IB/rdmavt: Annotate rvt_reset_qp() This patch avoids that sparse reports the following warning: rdmavt/qp.c:507:17: warning: context imbalance in 'rvt_reset_qp' - unexpected unlock Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5fa4d4d81ee0..7de5134bec85 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -502,6 +502,12 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) */ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) + __releases(&qp->s_lock) + __releases(&qp->s_hlock) + __releases(&qp->r_lock) + __acquires(&qp->r_lock) + __acquires(&qp->s_hlock) + __acquires(&qp->s_lock) { if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; From 48a0cc139fb89590fd66eea11b626b9b9f6b8e9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:56 -0700 Subject: [PATCH 0264/1050] IB/hfi1: Fix indentation Make the indentation of the source code consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3b876da745a1..81619fbb5842 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7832,8 +7832,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * save first 2 flits in the packet that caused * the error */ - dd->err_info_rcvport.packet_flit1 = hdr0; - dd->err_info_rcvport.packet_flit2 = hdr1; + dd->err_info_rcvport.packet_flit1 = hdr0; + dd->err_info_rcvport.packet_flit2 = hdr1; } switch (info) { case 1: @@ -11906,7 +11906,7 @@ static void update_synth_timer(unsigned long opaque) hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } -mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } #define C_MAX_NAME 13 /* 12 chars + one for /0 */ From d55215c50e4eaa4b906a42ef45884d8fcbadc777 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:10:37 -0700 Subject: [PATCH 0265/1050] IB/hfi1: Use bit 0 instead of bit 1 The first argument of test_bit() and clear_bit() is a bit number and not a bitmask. Hence change that first argument from (1 << 0) into 0. This patch avoids that smatch reports the following warnings: user_sdma.c:1059: sdma_cache_evict() warn: test_bit() takes a bit number user_sdma.c:1590: sdma_rb_remove() warn: test_bit() takes a bit number Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 29f4795f866c..2eca5b7394a2 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -183,7 +183,7 @@ struct user_sdma_iovec { struct sdma_mmu_node *node; }; -#define SDMA_CACHE_NODE_EVICT BIT(0) +#define SDMA_CACHE_NODE_EVICT 0 struct sdma_mmu_node { struct mmu_rb_node rb; From 55c40648d31d0d97608f266955b8afae74e2b686 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:11:16 -0700 Subject: [PATCH 0266/1050] IB/hfi1: Suppress sparse warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid that sparse reports the following warnings for the hfi1 driver: trace.c:217:13: warning: no previous prototype for ‘print_u64_array’ [-Wmissing-prototypes] user_sdma.c:1361:17: warning: dubious: !x & y Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 13 ------------- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ++-- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 79b2952c0dfb..4cfb13771897 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -214,19 +214,6 @@ const char *print_u32_array( return ret; } -const char *print_u64_array( - struct trace_seq *p, - u64 *arr, int len) -{ - int i; - const char *ret = trace_seq_buffer_ptr(p); - - for (i = 0; i < len; i++) - trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]); - trace_seq_putc(p, 0); - return ret; -} - __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 2eca5b7394a2..47ffd273ecbd 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1355,11 +1355,11 @@ static int set_txreq_header(struct user_sdma_request *req, */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", req->tidoffset, req->tidoffset / req->omfactor, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset / req->omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, From dcf15cbded656a12335bc4151f3f75f10080a375 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 3 Jun 2016 10:26:12 +0800 Subject: [PATCH 0267/1050] ACPI / EC: Fix a boot EC regresion by restoring boot EC support for the DSDT EC According to the Windows probing result, during the table loading, the EC device described in the ECDT should be used. And the ECDT EC is also effective during the period the namespace objects are initialized (we can see a separate process executing _STA/_INI on Windows before executing other device specific control methods, for example, EC._REG). During the device enumration, the EC device described in the DSDT should be used. But there are differences between Linux and Windows around the device probing order. Thus in Linux, we should enable the DSDT EC as early as possible before enumerating devices in order not to trigger issues related to the device enumeration order differences. This patch thus converts acpi_boot_ec_enable() into acpi_ec_dsdt_probe() to fix the gap. This also fixes a user reported regression triggered after we switched the "table loading"/"ECDT support" to be ACPI spec 2.0 compliant. Fixes: 59f0aa9480cf (ACPI 2.0 / ECDT: Remove early namespace reference from EC) Link: https://bugzilla.kernel.org/show_bug.cgi?id=119261 Reported-and-tested-by: Gabriele Mazzotta Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 2 +- drivers/acpi/ec.c | 29 ++++++++++++++++++++++------- drivers/acpi/internal.h | 2 +- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 31e8da648fff..262ca31b86d9 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1051,7 +1051,7 @@ static int __init acpi_bus_init(void) * Maybe EC region is required at bus_scan/acpi_get_devices. So it * is necessary to enable it as early as possible. */ - acpi_boot_ec_enable(); + acpi_ec_dsdt_probe(); printk(KERN_INFO PREFIX "Interpreter enabled\n"); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 0e70181f150c..73c76d646064 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1446,10 +1446,30 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context) return AE_OK; } -int __init acpi_boot_ec_enable(void) +static const struct acpi_device_id ec_device_ids[] = { + {"PNP0C09", 0}, + {"", 0}, +}; + +int __init acpi_ec_dsdt_probe(void) { - if (!boot_ec) + acpi_status status; + + if (boot_ec) return 0; + + /* + * Finding EC from DSDT if there is no ECDT EC available. When this + * function is invoked, ACPI tables have been fully loaded, we can + * walk namespace now. + */ + boot_ec = make_acpi_ec(); + if (!boot_ec) + return -ENOMEM; + status = acpi_get_devices(ec_device_ids[0].id, + ec_parse_device, boot_ec, NULL); + if (ACPI_FAILURE(status) || !boot_ec->handle) + return -ENODEV; if (!ec_install_handlers(boot_ec)) { first_ec = boot_ec; return 0; @@ -1457,11 +1477,6 @@ int __init acpi_boot_ec_enable(void) return -EFAULT; } -static const struct acpi_device_id ec_device_ids[] = { - {"PNP0C09", 0}, - {"", 0}, -}; - #if 0 /* * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 7c188472d9c2..b4733b56fed1 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -180,7 +180,7 @@ typedef int (*acpi_ec_query_func) (void *data); int acpi_ec_init(void); int acpi_ec_ecdt_probe(void); -int acpi_boot_ec_enable(void); +int acpi_ec_dsdt_probe(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); void acpi_ec_unblock_transactions_early(void); From a27758ffaf96f89002129eedb2cc172d254099f8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Jun 2016 15:05:57 -0700 Subject: [PATCH 0268/1050] net_sched: keep backlog updated with qlen For gso_skb we only update qlen, backlog should be updated too. Note, it is correct to just update these stats at one layer, because the gso_skb is cached there. Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- net/sched/sch_generic.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c22a59..6803af17dfcf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -691,9 +691,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -706,6 +708,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71b3828..f9e0e9c03d0a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); @@ -92,6 +93,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; + qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; } else skb = NULL; From 97c1df3e54e811aed484a036a798b4b25d002ecf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Jun 2016 15:36:07 -0500 Subject: [PATCH 0269/1050] mnt: If fs_fully_visible fails call put_filesystem. Add this trivial missing error handling. Cc: stable@vger.kernel.org Fixes: 1b852bceb0d1 ("mnt: Refactor the logic for mounting sysfs and proc in a user namespace") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691b4355..9d45c8a3414f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2409,8 +2409,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } From d71ed6c930ac7d8f88f3cef6624a7e826392d61f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 27 May 2016 14:50:05 -0500 Subject: [PATCH 0270/1050] mnt: fs_fully_visible test the proper mount for MNT_LOCKED MNT_LOCKED implies on a child mount implies the child is locked to the parent. So while looping through the children the children should be tested (not their parent). Typically an unshare of a mount namespace locks all mounts together making both the parent and the slave as locked but there are a few corner cases where other things work. Cc: stable@vger.kernel.org Fixes: ceeb0e5d39fc ("vfs: Ignore unlocked mounts in fs_fully_visible") Reported-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9d45c8a3414f..a7ec92c051f5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3273,7 +3273,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) From 5156463588c3999b630d9ffc6061a54962f3c2d9 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Wed, 1 Jun 2016 12:43:32 +0200 Subject: [PATCH 0271/1050] dmaengine: mv_xor: Fix incorrect offset in dma_map_page() Upon booting, I occasionally spotted some BUGs triggered by the internal DMA test routine executed upon driver probing. This was detected by SLUB_DEBUG ("Freechain corrupt" or "Redzone overwritten"). Tracking this down located a problem in passing 0 as offset in dma_map_page(). As kmalloc, especially when used with SLUB_DEBUG, may return a non page aligned address. This patch fixes this issue by passing the correct offset in dma_map_page(). Tested on a custom Armada XP board. Signed-off-by: Stefan Roese Cc: Thomas Petazzoni Cc: Gregory CLEMENT Cc: Marcin Wojtas Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 25d1dadcddd1..d0446a75990a 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -703,8 +703,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) goto free_resources; } - src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0, - PAGE_SIZE, DMA_TO_DEVICE); + src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), + (size_t)src & ~PAGE_MASK, PAGE_SIZE, + DMA_TO_DEVICE); unmap->addr[0] = src_dma; ret = dma_mapping_error(dma_chan->device->dev, src_dma); @@ -714,8 +715,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) } unmap->to_cnt = 1; - dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0, - PAGE_SIZE, DMA_FROM_DEVICE); + dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), + (size_t)dest & ~PAGE_MASK, PAGE_SIZE, + DMA_FROM_DEVICE); unmap->addr[1] = dest_dma; ret = dma_mapping_error(dma_chan->device->dev, dest_dma); From 2969c03763b40e946b46aee57ef083527711c69f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:43 -0400 Subject: [PATCH 0272/1050] ARM: dts: exynos: Fix port nodes names for Exynos5250 Snow board Commit 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") fixed all the DTC warnings about mismatchs between unit names and reg properties in Exynos5250 boards DTS. But unfortunately it also added a regression on the Exynos5250 Snow Chromebook when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Tested-by: Marc Zyngier Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5250-snow-common.dtsi | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/exynos5250-snow-common.dtsi b/arch/arm/boot/dts/exynos5250-snow-common.dtsi index ddfe1f558c10..fa14f77df563 100644 --- a/arch/arm/boot/dts/exynos5250-snow-common.dtsi +++ b/arch/arm/boot/dts/exynos5250-snow-common.dtsi @@ -242,7 +242,7 @@ hpd-gpios = <&gpx0 7 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -485,13 +485,20 @@ edid-emulation = <5>; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; From a7d7865fecd83adb98b20eca5eddef7efc94831d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:44 -0400 Subject: [PATCH 0273/1050] ARM: dts: exynos: Fix port nodes names for Exynos5420 Peach Pit board Commit bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") fixed the DTC warnings about mismatches between unit names and reg properties in the Exynos5420 Peach Pit DTS. But unfortunately it also added a regression on the Peach Pit when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index f9d2e4f1a0e0..1de972d46a87 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -163,7 +163,7 @@ hpd-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -631,13 +631,20 @@ use-external-pwm; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; From 8d0a0710ea0d22881fdb40eb79d346a98cc64ae6 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 2 Jun 2016 17:59:54 +0300 Subject: [PATCH 0274/1050] ath10k: fix crash related to printing features This looks like a regression from commit c4cdf753ed42 ("ath10k: move fw_features to struct ath10k_fw_file"), we were printing the features from a wrong struct. Fixes: c4cdf753ed42 ("ath10k: move fw_features to struct ath10k_fw_file") Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: improve commit log] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 49af62428c88..a92a0ba829f5 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1083,7 +1083,7 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, } ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "", - ar->running_fw->fw_file.fw_features, + fw_file->fw_features, sizeof(fw_file->fw_features)); break; case ATH10K_FW_IE_FW_IMAGE: From dab38e43b298501a4e8807b56117c029e2e98383 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Tue, 7 Jun 2016 13:14:21 +0200 Subject: [PATCH 0275/1050] ALSA: hda/realtek: Add T560 docking unit fixup Tested with Lenovo Ultradock. Fixes the non-working headphone jack on the docking unit. Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 49d581aed7f9..0fe18ede3e85 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5634,6 +5634,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), From 41aaa99fab6ceaa4b533c2b6ad4913987ddb3ddc Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 6 Jun 2016 19:52:55 -0400 Subject: [PATCH 0276/1050] IB/core: Fix array length allocation The new sysfs hw_counters code had an off by one in its array allocation length. Fix that and the comment along with it. Reported-by: Mark Bloch Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed04a7bd4481..2bc43444841b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -901,9 +901,12 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, if (!stats->names || stats->num_counters <= 0) goto err_free_stats; + /* + * Two extra attribue elements here, one for the lifespan entry and + * one to NULL terminate the list for the sysfs core code + */ hsag = kzalloc(sizeof(*hsag) + - // 1 extra for the lifespan config entry - sizeof(void *) * (stats->num_counters + 1), + sizeof(void *) * (stats->num_counters + 2), GFP_KERNEL); if (!hsag) goto err_free_stats; From 495fbae6e2c115099921ba33b1e1bea1190b5280 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 7 Jun 2016 07:43:46 -0400 Subject: [PATCH 0277/1050] IB/core: fix error unwind in sysfs hw counters code Between the initial and final versions of the function setup_hw_stats, the order of variable initialization was changed. However, the unwind flow on error did not properly keep up with the flow changes. Make the unwind flow match a proper unwind of the allocation flow, then remove no longer needed variable initializations. Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 2bc43444841b..35d0d47e6f8c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -889,9 +889,9 @@ static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) static void setup_hw_stats(struct ib_device *device, struct ib_port *port, u8 port_num) { - struct attribute_group *hsag = NULL; + struct attribute_group *hsag; struct rdma_hw_stats *stats; - int i = 0, ret; + int i, ret; stats = device->alloc_hw_stats(device, port_num); @@ -914,7 +914,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); if (ret != stats->num_counters) - goto err; + goto err_free_hsag; stats->timestamp = jiffies; @@ -951,6 +951,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, err: for (; i >= 0; i--) kfree(hsag->attrs[i]); +err_free_hsag: kfree(hsag); err_free_stats: kfree(stats); From d7012467a95b767b4d3beb2e027aa24a83f12f0f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sat, 4 Jun 2016 15:15:18 +0300 Subject: [PATCH 0278/1050] IB/core: Fix query port failure in RoCE Currently ib_query_port always attempts to to read the subnet prefix by calling ib_query_gid(). For RoCE/iWARP there is no subnet manager and no subnet prefix. Fix this by querying GID[0] only for IB networks. Fixes: fad61ad4e755 ('IB/core: Add subnet prefix to port info') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8b8a8d9cf134..5c155fa91eec 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -661,6 +661,9 @@ int ib_query_port(struct ib_device *device, if (err || port_attr->subnet_prefix) return err; + if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) + return 0; + err = ib_query_gid(device, port_num, 0, &gid, NULL); if (err) return err; From 198b12f77084244d310888dd5d643083cb5c2aa1 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:20 +0300 Subject: [PATCH 0279/1050] IB/IPoIB: Fix race between ipoib_remove_one to sysfs functions In ipoib_remove_one the driver holds the rtnl_lock and tries to do some operation like dev_change_flags or unregister_netdev, while sysfs callback like ipoib_vlan_delete holds sysfs mutex and tries to hold the rtnl_lock via rtnl_trylock() and restart_syscall() if the lock is not free, meanwhile ipoib_remove_one tries to get the sysfs lock in order to free its sysfs directory, and we will get a->b, b->a deadlock. Trace like the following: schedule+0x37/0x80 schedule_preempt_disabled+0xe/0x10 __mutex_lock_slowpath+0xb5/0x120 mutex_lock+0x23/0x40 rtnl_lock+0x15/0x20 netdev_run_todo+0x17c/0x320 rtnl_unlock+0xe/0x10 ipoib_vlan_delete+0x11b/0x1b0 [ib_ipoib] delete_child+0x54/0x80 [ib_ipoib] dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 mutex_lock+0x16/0x40 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x16/0x75 And schedule+0x37/0x80 __kernfs_remove+0x1a8/0x260 ? wake_atomic_t_function+0x60/0x60 kernfs_remove+0x25/0x40 sysfs_remove_dir+0x50/0x80 kobject_del+0x18/0x50 device_del+0x19f/0x260 netdev_unregister_kobject+0x6a/0x80 rollback_registered_many+0x1fd/0x340 rollback_registered+0x3c/0x70 unregister_netdevice_queue+0x55/0xc0 unregister_netdev+0x20/0x30 ipoib_remove_one+0x114/0x1b0 [ib_ipoib] ib_unregister_client+0x4a/0x170 [ib_core] ? find_module_all+0x71/0xa0 ipoib_cleanup_module+0x10/0x94 [ib_ipoib] SyS_delete_module+0x1b5/0x210 entry_SYSCALL_64_fastpath+0x16/0x75 The fix is by checking the flag IPOIB_FLAG_INTF_ON_DESTROY in order to get out from the sysfs function. Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 6 ++++++ 4 files changed, 14 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bab7db6fa9ab..4f7d9b48df64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -94,6 +94,7 @@ enum { IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, + IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b2f42835d76d..951d9abcca8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1486,6 +1486,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) + return -EPERM; if (!rtnl_trylock()) return restart_syscall(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2d7c16346648..0aa52c2f9438 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2141,6 +2141,9 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) ib_unregister_event_handler(&priv->event_handler); flush_workqueue(ipoib_workqueue); + /* mark interface in the middle of destruction */ + set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); + rtnl_lock(); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 64a35595eab8..a2f9f29c6ab5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -131,6 +131,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + snprintf(intf_name, sizeof intf_name, "%s.%04x", ppriv->dev->name, pkey); priv = ipoib_intf_alloc(intf_name); @@ -183,6 +186,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + if (!rtnl_trylock()) return restart_syscall(); From 8e787646fbce895c20c4433973e90af90e1c6a28 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sat, 4 Jun 2016 15:15:21 +0300 Subject: [PATCH 0280/1050] IB/core: Fix removal of default GID cache entry When deleting a default GID from the cache, its gid_type field is set to 0. This could set the gid_type to RoCE v1 for a RoCE v2 default GID, essentially making it inaccessible to future modifications, since it is no longer found by find_gid(). This fix preserves the gid_type value for default gids during cache operations. Fixes: b39ffa1df505 ('IB/core: Add gid_type to gid attribute') Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c2e257d97eff..040966775f40 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -178,6 +178,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, { int ret = 0; struct net_device *old_net_dev; + enum ib_gid_type old_gid_type; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. @@ -199,6 +200,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, } old_net_dev = table->data_vec[ix].attr.ndev; + old_gid_type = table->data_vec[ix].attr.gid_type; if (old_net_dev && old_net_dev != attr->ndev) dev_put(old_net_dev); /* if modify_gid failed, just delete the old gid */ @@ -207,10 +209,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port, attr = &zattr; table->data_vec[ix].context = NULL; } - if (default_gid) - table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); + if (default_gid) { + table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + if (action == GID_TABLE_WRITE_ACTION_DEL) + table->data_vec[ix].attr.gid_type = old_gid_type; + } if (table->data_vec[ix].attr.ndev && table->data_vec[ix].attr.ndev != old_net_dev) dev_hold(table->data_vec[ix].attr.ndev); From 9b29953bf8ca23944c5e00dcc15ad7bd9fecdd4e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:22 +0300 Subject: [PATCH 0281/1050] IB/IPoIB: Disable bottom half when dealing with device address Align locking usage when touching device address with rest of the kernel. Lock the bottom half when doing so using netif_addr_lock_bh. This also solves the following case as reported by lockdep: CPU0 CPU1 ---- ---- lock(_xmit_INFINIBAND); local_irq_disable(); lock(&(&mc->mca_lock)->rlock); lock(_xmit_INFINIBAND); lock(&(&mc->mca_lock)->rlock); *** DEADLOCK *** Fixes: 492a7e67ff83 ("IB/IPoIB: Allow setting the device address") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 45c40a17d6a6..dc6d241b9406 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1015,7 +1015,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) return false; - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); /* The subnet prefix may have changed, update it now so we won't have * to do it later @@ -1026,12 +1026,12 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) search_gid.global.interface_id = priv->local_gid.global.interface_id; - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, priv->dev, &port, &index); - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); if (search_gid.global.interface_id != priv->local_gid.global.interface_id) @@ -1092,7 +1092,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) } out: - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0aa52c2f9438..248da5015093 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1851,7 +1851,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) struct ipoib_dev_priv *child_priv; struct net_device *netdev = priv->dev; - netif_addr_lock(netdev); + netif_addr_lock_bh(netdev); memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, @@ -1859,7 +1859,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - netif_addr_unlock(netdev); + netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { down_read(&priv->vlan_rwsem); @@ -1875,7 +1875,7 @@ static int ipoib_check_lladdr(struct net_device *dev, union ib_gid *gid = (union ib_gid *)(ss->__data + 4); int ret = 0; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); /* Make sure the QPN, reserved and subnet prefix match the current * lladdr, it also makes sure the lladdr is unicast. @@ -1885,7 +1885,7 @@ static int ipoib_check_lladdr(struct net_device *dev, gid->global.interface_id == 0) ret = -EINVAL; - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 82fbc9442608..d3394b6add24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -582,13 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } priv->local_lid = port_attr.lid; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return; } - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); spin_lock_irq(&priv->lock); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) From 8aec013afe6d9665eb478396026ebd4384dbe934 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:24 +0300 Subject: [PATCH 0282/1050] IB/core: Initialize sysfs attributes before sysfs create group For dynamically allocated sysfs attributes there is a need to call sysfs_attr_init in order to comply with lockdep, not calling it will result in error complaining key is not in .data section. Fixes: b40f4757daa1 ("IB/core: Make device counter infrastructure dynamic") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 35d0d47e6f8c..a5793c8f1590 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -925,10 +925,13 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); if (!hsag->attrs[i]) goto err; + sysfs_attr_init(hsag->attrs[i]); } /* treat an error here as non-fatal */ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + if (hsag->attrs[i]) + sysfs_attr_init(hsag->attrs[i]); if (port) { struct kobject *kobj = &port->kobj; From 47355b3cd7d3c9c5226bff7c449b9d269fb17fa6 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 6 Jun 2016 19:34:39 +0300 Subject: [PATCH 0283/1050] IB/core: Fix bit curruption in ib_device_cap_flags structure ib_device_cap_flags 64-bit expansion caused caps overlapping and made consumers read wrong device capabilities. For example IB_DEVICE_SG_GAPS_REG was falsely read by the iser driver causing it to use a non-existing capability. This happened because signed int becomes sign extended when converted it to u64. Fix this by casting IB_DEVICE_ON_DEMAND_PAGING enumeration to ULL. Fixes: f5aa9159a418 ('IB/core: Add arbitrary sg_list support') Reported-by: Robert LeBlanc Cc: Stable #[v4.6+] Acked-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Matan Barak Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 432bed510369..c97357b6c276 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -217,7 +217,7 @@ enum ib_device_cap_flags { IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), - IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34), From c7e162a417488f3c79eb09f3c4f1d36f1e042463 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 6 Jun 2016 19:34:40 +0300 Subject: [PATCH 0284/1050] IB/core: Make all casts in ib_device_cap_flags enum consistent Replace the few u64 casts with ULL to match the rest of the casts. Signed-off-by: Max Gurtovoy Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c97357b6c276..7e440d41487a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -219,8 +219,8 @@ enum ib_device_cap_flags { IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), - IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), - IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34), + IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), + IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), }; enum ib_signature_prot_cap { From da6d6ba3c6f085abf82723612efd746a97f8e414 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sat, 4 Jun 2016 15:15:28 +0300 Subject: [PATCH 0285/1050] IB/mlx5: Set flow steering capability bit Flow steering is supported by mlx5 device when the following features are supported by firmware: 1. NIC RX flow table. 2. Device has enough flow steering levels. 3. Atomic modification of flow table entry. 4. Flow tables chaining. To check if flow steering is supported it's enough to check if the driver opened the mlx5 bypass namespace. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c72797cd9e4f..83047ef1394c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -524,6 +524,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_ETH(dev->mdev, scatter_fcs)) props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; From 2788cf3bd90af3791c3195c52391bcf34fa67b40 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:29 +0300 Subject: [PATCH 0286/1050] IB/mlx5: Return PORT_ERR in Active to Initializing tranisition FW port-change events are fired on Active <-> non Active port state transitions only. When the port state changes from Active to Initializing (Active -> Down -> Initializing), a single event is fired. The HCA transitions from Down to Initializing unless prevented from doing so, hence the driver should also propagate events when the port state is Initializing to consumers so they'll be aware that the port is no longer Active and act accordingly. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 83047ef1394c..94a8f914b237 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1869,14 +1869,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, break; case MLX5_DEV_EVENT_PORT_DOWN: + case MLX5_DEV_EVENT_PORT_INITIALIZED: ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; break; - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* not used by ULPs */ - return; - case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; port = (u8)param; From c0fcebf55289c48148992eee002a7caf853a5358 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sat, 4 Jun 2016 15:15:30 +0300 Subject: [PATCH 0287/1050] IB/mlx5: Fix FW version diaplay in sysfs Add a 4-digit padding to show FW version in proper format. Fixes: 9603b61de1eee ('mlx5: Move pci device handling from...') Signed-off-by: Eran Ben Elisha Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94a8f914b237..8845f4b9621b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1801,7 +1801,7 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev), + return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } From bc5c6eed0510f19b033ce7a7d3976e695e96785b Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:31 +0300 Subject: [PATCH 0288/1050] IB/mlx5: Limit query HCA clock When PAGE_SIZE is larger than 4K, the user shouldn't be able to query the HCA core clock. This counter is within 4KB boundary and the user-space shall not read information that's after this boundary. Fixes: b368d7cb8ceb7 ('IB/mlx5: Add hca_core_clock_offset to...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8845f4b9621b..05036dbb9804 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -991,7 +991,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); - if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096 && + field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; resp.hca_core_clock_offset = From 0540d8148d419bf769e5aa99c77027febd8922f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:32 +0300 Subject: [PATCH 0289/1050] IB/mlx5: Fix returned values of query QP Some variables were not initialized properly: max_recv_wr, max_recv_sge, max_send_wr, qp_context and max_inline_data. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 504117657d41..43c1441b6fb8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -235,6 +235,8 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.max_gs = 0; qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; @@ -4079,17 +4081,19 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_wr = qp->sq.max_post; qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_init_attr->qp_context = ibqp->qp_context; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } - /* We don't support inline sends for kernel QPs (yet), and we - * don't know what userspace's value should be. - */ - qp_attr->cap.max_inline_data = 0; + qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->recv_cq = ibqp->recv_cq; + qp_init_attr->send_cq = ibqp->send_cq; + qp_init_attr->srq = ibqp->srq; + qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; From 2cc6ad5f2130733e561f97dba7a2052f8ea024aa Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:33 +0300 Subject: [PATCH 0290/1050] IB/mlx5: Check BlueFlame HCA support BlueFlame support is reported only for PFs when the HCA capability is on. Fixes: 938fe83c8dcbb ('net/mlx5_core: New device capabilities...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 05036dbb9804..b48ad85315dc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -918,7 +918,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = L1_CACHE_BYTES; resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); From 9ea578528656e191c1097798a771ff08bab6f323 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:34 +0300 Subject: [PATCH 0291/1050] IB/mlx5: Fix entries checks in mlx5_ib_create_cq Number of entries shouldn't be greater than the device's max capability. This should be checked before rounding the entries number to power of two. Fixes: 51ee86a4af639 ('IB/mlx5: Fix check of number of entries...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dabcc65bd65e..3984c68caae1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -822,7 +822,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (entries < 0) + if (entries < 0 || + (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) return ERR_PTR(-EINVAL); if (check_cq_create_flags(attr->flags)) From 3c4c37746c919c983e439ac6a7328cd2d48c10ed Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:35 +0300 Subject: [PATCH 0292/1050] IB/mlx5: Fix entries check in mlx5_ib_resize_cq Verify that number of entries is less than device capability. Add an appropriate warning message for error flow. Fixes: bde51583f49b ('IB/mlx5: Add support for resize CQ') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 3984c68caae1..9c0e67bd2ba7 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1169,11 +1169,16 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -ENOSYS; } - if (entries < 1) + if (entries < 1 || + entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { + mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", + entries, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); return -EINVAL; + } entries = roundup_pow_of_two(entries + 1); - if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) From d3ae2bdeba9bad8cb95301451aeaf03ce31e82f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:36 +0300 Subject: [PATCH 0293/1050] IB/mlx5: Fix pkey_index length in the QP path record Pkey index fields in the QP context path record are extended to 16 bits, as required by IB spec (version 1.3). This change affects all QP commands which include path records. To enable this change, moved the free adaptive routing flag bit (free_ar) to the most significant byte of the QP path record. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB ...') Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++++------- include/linux/mlx5/qp.h | 5 ++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 43c1441b6fb8..6b90bfdea830 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1859,7 +1859,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = attr->pkey_index; + path->pkey_index = cpu_to_be16(attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1879,9 +1879,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; } else { - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : - 0; + path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->fl_free_ar |= + (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; path->rlid = cpu_to_be16(ah->dlid); path->grh_mlid = ah->src_path_bits & 0x7f; if (ah->ah_flags & IB_AH_GRH) @@ -2266,7 +2266,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PKEY_INDEX) - context->pri_path.pkey_index = attr->pkey_index; + context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); /* todo implement counter_index functionality */ @@ -4015,11 +4015,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); - qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_pkey_index = + be16_to_cpu(context->alt_path.pkey_index); qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); qp_attr->port_num = context->pri_path.port; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..e4e29882fdfd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -460,10 +460,9 @@ struct mlx5_core_qp { }; struct mlx5_qp_path { - u8 fl; + u8 fl_free_ar; u8 rsvd3; - u8 free_ar; - u8 pkey_index; + __be16 pkey_index; u8 rsvd0; u8 grh_mlid; __be16 rlid; From f879ee8d900fc78b5bc5d840edd9ecb57d02ab7e Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Sat, 4 Jun 2016 15:15:37 +0300 Subject: [PATCH 0294/1050] IB/mlx5: Fix alternate path code Userspace flag IBV_QP_ALT_PATH is supposed to set the alternate path including fields alt_pkey_index and alt_timeout. Added IB_QP_PKEY_INDEX and IB_QP_TIMEOUT to the attribute mask when calling mlx5_set_path for the alternate path to force setting the alt_pkey_index and alt_timeout values. Fixes: bf24481a3a7c4 ('IB/mlx5: Consider alternate path in pkey ...') Signed-off-by: Achiad Shochat Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6b90bfdea830..ce434228a5ea 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1853,13 +1853,15 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, - u32 path_flags, const struct ib_qp_attr *attr) + u32 path_flags, const struct ib_qp_attr *attr, + bool alt) { enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = cpu_to_be16(attr->pkey_index); + path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : + attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1905,7 +1907,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, path->port = port; if (attr_mask & IB_QP_TIMEOUT) - path->ackto_lt = attr->timeout << 3; + path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3; if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, @@ -2279,7 +2281,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_AV) { err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, - attr_mask, 0, attr); + attr_mask, 0, attr, false); if (err) goto out; } @@ -2290,7 +2292,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, &context->alt_path, - attr->alt_port_num, attr_mask, 0, attr); + attr->alt_port_num, + attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT, + 0, attr, true); if (err) goto out; } From 61c78eea9516a921799c17b4c20558e2aa780fd3 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:19 +0300 Subject: [PATCH 0295/1050] IB/IPoIB: Don't update neigh validity for unresolved entries ipoib_neigh_get unconditionally updates the "alive" variable member on any packet send. This prevents the neighbor garbage collection from cleaning out a dead neighbor entry if we are still queueing packets for it. If the queue for this neighbor is full, then don't update the alive timestamp. That way the neighbor can time out even if packets are still being queued as long as none of them are being sent. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 248da5015093..5f58c41ef787 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1206,7 +1206,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = NULL; goto out_unlock; } - neigh->alive = jiffies; + + if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)) + neigh->alive = jiffies; goto out_unlock; } } From 05bd92dddc595d74ea645e793c1f3bd4b1fc251a Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Tue, 7 Jun 2016 11:32:13 -0500 Subject: [PATCH 0296/1050] block: missing bio_put following submit_bio_wait submit_bio_wait() gives the caller an opportunity to examine struct bio and so expects the caller to issue the put_bio() This fixes a memory leak reported by a few people in 4.7-rc2 kmemleak report after 9082e87bfbf8 ("block: remove struct bio_batch") Signed-off-by: Shaun Tancheff Tested-by: Catalin Marinas Tested-by: Larry Finger@lwfinger.net Tested-by: David Drysdale Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-lib.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 23d7f301a196..9e29dc351695 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -113,6 +113,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ret = submit_bio_wait(type, bio); if (ret == -EOPNOTSUPP) ret = 0; + bio_put(bio); } blk_finish_plug(&plug); @@ -165,8 +166,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, } } - if (bio) + if (bio) { ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio); + bio_put(bio); + } return ret != -EOPNOTSUPP ? ret : 0; } EXPORT_SYMBOL(blkdev_issue_write_same); @@ -206,8 +209,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, } } - if (bio) - return submit_bio_wait(WRITE, bio); + if (bio) { + ret = submit_bio_wait(WRITE, bio); + bio_put(bio); + return ret; + } return 0; } From b36fad65d61fffe4b662d4bfb1ed673c455a36a2 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 7 Jun 2016 11:57:02 +0200 Subject: [PATCH 0297/1050] kbuild: Initialize exported variables The NOSTDINC_FLAGS variable is exported, so it needs to be cleared to avoid duplicating its content when running make from within make (e.g. in the packaging targets). This became an issue after commit 9c8fa9bc08f6 ("kbuild: fix if_change and friends to consider argument order"), which no longer ignores the duplicate options. As Paulo Zanoni points out, the LDFLAGS_vmlinux variable has the same problem. Reported-by: "Zanoni, Paulo R" Fixes: 9c8fa9bc08f6 ("kbuild: fix if_change and friends to consider argument order") Signed-off-by: Michal Marek --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 0f70de63cfdb..af0c463e908f 100644 --- a/Makefile +++ b/Makefile @@ -363,11 +363,13 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) +NOSTDINC_FLAGS = CFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = +LDFLAGS_vmlinux = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized CFLAGS_KCOV = -fsanitize-coverage=trace-pc From 5b6c1b4d46b0dae4edea636a776d09f2064f4cd7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 4 Jun 2016 20:50:59 +0200 Subject: [PATCH 0298/1050] bpf, trace: use READ_ONCE for retrieving file ptr In bpf_perf_event_read() and bpf_perf_event_output(), we must use READ_ONCE() for fetching the struct file pointer, which could get updated concurrently, so we must prevent the compiler from potential refetching. We already do this with tail calls for fetching the related bpf_prog, but not so on stored perf events. Semantics for both are the same with regards to updates. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 780bcbe1d4de..720b7bb01d43 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; @@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; From 80e509db54c81247b32fcb75bb1730fc789b893d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Jun 2016 12:55:13 -0700 Subject: [PATCH 0299/1050] fq_codel: fix NET_XMIT_CN behavior My prior attempt to fix the backlogs of parents failed. If we return NET_XMIT_CN, our parents wont increase their backlog, so our qdisc_tree_reduce_backlog() should take this into account. v2: Florian Westphal pointed out that we could drop the packet, so we need to save qdisc_pkt_len(skb) in a temp variable before calling fq_codel_drop() Fixes: 9d18562a2278 ("fq_codel: add batch ability to fq_codel_drop()") Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Reported-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a8971562..fff7867f4a4f 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -199,6 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + unsigned int pkt_len; bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); @@ -230,6 +231,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) prev_backlog = sch->qstats.backlog; prev_qlen = sch->q.qlen; + /* save this packet length as it might be dropped by fq_codel_drop() */ + pkt_len = qdisc_pkt_len(skb); /* fq_codel_drop() is quite expensive, as it performs a linear search * in q->backlogs[] to find a fat flow. * So instead of dropping a single packet, drop half of its backlog @@ -237,14 +240,23 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ ret = fq_codel_drop(sch, q->drop_batch_size); - q->drop_overlimit += prev_qlen - sch->q.qlen; + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + q->drop_overlimit += prev_qlen; if (memory_limited) - q->drop_overmemory += prev_qlen - sch->q.qlen; - /* As we dropped packet(s), better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + q->drop_overmemory += prev_qlen; - return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this. + * If we dropped a packet for this flow, return NET_XMIT_CN, + * but in this case, our parents wont increase their backlogs. + */ + if (ret == idx) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - pkt_len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); + return NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() From 335b48d980f631fbc5b233cbb3625ac0c86d67cb Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:58 -0700 Subject: [PATCH 0300/1050] RDS: TCP: Add/use rds_tcp_reset_callbacks to reset tcp socket safely When rds_tcp_accept_one() has to replace the existing tcp socket with a newer tcp socket (duelling-syn resolution), it must lock_sock() to suppress the rds_tcp_data_recv() path while callbacks are being changed. Also, existing RDS datagram reassembly state must be reset, so that the next datagram on the new socket does not have corrupted state. Similarly when resetting the newly accepted socket, appropriate locks and synchronization is needed. This commit ensures correct synchronization by invoking kernel_sock_shutdown to reset a newly accepted sock, and by taking appropriate lock_sock()s (for old and new sockets) when resetting existing callbacks. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 65 ++++++++++++++++++++++++++++++++++++++++++-- net/rds/tcp.h | 1 + net/rds/tcp_listen.c | 13 +++------ 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187dad1440..8faa0b1ae39d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -126,9 +126,68 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. + */ +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + */ + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_sock = NULL; + + write_lock_bh(&osock->sk->sk_callback_lock); + + osock->sk->sk_user_data = NULL; + osock->sk->sk_data_ready = tc->t_orig_data_ready; + osock->sk->sk_write_space = tc->t_orig_write_space; + osock->sk->sk_state_change = tc->t_orig_state_change; + write_unlock_bh(&osock->sk->sk_callback_lock); + release_sock(osock->sk); + sock_release(osock); +newsock: + lock_sock(sock->sk); + write_lock_bh(&sock->sk->sk_callback_lock); + tc->t_sock = sock; + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 41c228300525..ec0602b0dc24 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,6 +50,7 @@ struct rds_tcp_statistics { void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4bf4befe5066..d9fe53675d95 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; - struct sock *nsk; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -139,23 +138,19 @@ int rds_tcp_accept_one(struct socket *sock) atomic_set(&conn->c_state, RDS_CONN_CONNECTING); wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; } + } else { + rds_tcp_set_callbacks(new_sock, conn); } - rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; rst_nsk: /* reset the newly returned accept sock and bail */ - nsk = new_sock->sk; - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; + kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: if (rs_tcp) From 0b6f760cff04a7cdfafc3ec6915e91fed0533d8d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:59 -0700 Subject: [PATCH 0301/1050] RDS: TCP: Retransmit half-sent datagrams when switching sockets in rds_tcp_reset_callbacks When we switch a connection's sockets in rds_tcp_rest_callbacks, any partially sent datagram must be retransmitted on the new socket so that the receiver can correctly reassmble the RDS datagram. Use rds_send_reset() which is designed for this purpose. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/send.c | 1 + net/rds/tcp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index c9cdb358ea88..b1962f8e30f7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn) list_splice_init(&conn->c_retrans, &conn->c_send_queue); spin_unlock_irqrestore(&conn->c_lock, flags); } +EXPORT_SYMBOL_GPL(rds_send_reset); static int acquire_in_xmit(struct rds_connection *conn) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8faa0b1ae39d..7ab1b41ffc88 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -173,6 +173,7 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: + rds_send_reset(conn); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; From 9c79440e2c5e2518879f1599270f64c3ddda3baf Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 14:00:00 -0700 Subject: [PATCH 0302/1050] RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one() The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rds.h | 2 ++ net/rds/tcp.c | 14 +++++++++++++- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 7 +++---- net/rds/threads.c | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b08eac0..387df5f32e49 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -74,6 +74,7 @@ enum { RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, + RDS_CONN_RESETTING, RDS_CONN_ERROR, }; @@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); +void rds_connect_path_complete(struct rds_connection *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7ab1b41ffc88..74ee126a6fe6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock, * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting * c_transport_data. We quiesce these threads by setting - * cp_state to something other than RDS_CONN_UP, and then + * c_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() */ + atomic_set(&conn->c_state, RDS_CONN_RESETTING); + wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fb82e0a0bf89..fba13d0305fb 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_complete(conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d9fe53675d95..686b1d03a558 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_outgoing) { goto rst_nsk; } else { - atomic_set(&conn->c_state, RDS_CONN_CONNECTING); - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); } } else { rds_tcp_set_callbacks(new_sock, conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); } - rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d23327..4a323045719b 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -71,9 +71,9 @@ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); -void rds_connect_complete(struct rds_connection *conn) +void rds_connect_path_complete(struct rds_connection *conn, int curr) { - if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, @@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); } +EXPORT_SYMBOL_GPL(rds_connect_path_complete); + +void rds_connect_complete(struct rds_connection *conn) +{ + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); +} EXPORT_SYMBOL_GPL(rds_connect_complete); /* From fa54cc70ed2eddd96773ffdf4d96d23ea7483e56 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 5 Jun 2016 07:17:19 +0800 Subject: [PATCH 0303/1050] rxrpc: fix ptr_ret.cocci warnings net/rxrpc/rxkad.c:1165:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci CC: David Howells Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/rxrpc/rxkad.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6b726a046a7d..bab56ed649ba 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1162,9 +1162,7 @@ static int rxkad_init(void) /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - return 0; + return PTR_ERR_OR_ZERO(rxkad_ci); } /* From 9c77679cadb118c0aa99e6f88533d91765a131ba Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 Apr 2016 17:01:33 -0700 Subject: [PATCH 0304/1050] x86, build: copy ldlinux.c32 to image.iso For newer versions of Syslinux, we need ldlinux.c32 in addition to isolinux.bin to reside on the boot disk, so if the latter is found, copy it, too, to the isoimage tree. Signed-off-by: H. Peter Anvin Cc: Linux Stable Tree --- arch/x86/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 700a9c6e6159..be8e688fa0d4 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -162,6 +162,9 @@ isoimage: $(obj)/bzImage for i in lib lib64 share end ; do \ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \ + cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \ + fi ; \ break ; \ fi ; \ if [ $$i = end ] ; then exit 1 ; fi ; \ From b9a8460a08a1e0150073cda3e7a0dd23cb888052 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:14 -0400 Subject: [PATCH 0305/1050] bnxt_en: Fix tx push race condition. Set the is_push flag in the software BD before the tx data is pushed to the chip. It is possible to get the tx interrupt as soon as the tx data is pushed. The tx handler will not handle the event properly if the is_push flag is not set and it will crash. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 72a2efff8e49..4615ed4311e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -286,7 +286,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod); txr->tx_prod = prod; + tx_buf->is_push = 1; netdev_tx_sent_queue(txq, skb->len); + wmb(); /* Sync is_push and byte queue before pushing data */ push_len = (length + sizeof(*tx_push) + 7) / 8; if (push_len > 16) { @@ -298,7 +300,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) push_len); } - tx_buf->is_push = 1; goto tx_done; } From 5a9f6b238e59bc05afb4cdeaf3672990bf2a5309 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:15 -0400 Subject: [PATCH 0306/1050] bnxt_en: Enable and disable RX CTAG and RX STAG VLAN acceleration together. The hardware can only be set to strip or not strip both the VLAN CTAG and STAG. It cannot strip one and not strip the other. Add logic to bnxt_fix_features() to toggle both feature flags when the user is toggling one of them. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4615ed4311e8..ae2b2646604a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5467,6 +5467,20 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if (!bnxt_rfs_capable(bp)) features &= ~NETIF_F_NTUPLE; + + /* Both CTAG and STAG VLAN accelaration on the RX side have to be + * turned on or off together. + */ + if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) != + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + features &= ~(NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX); + else + features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX; + } + return features; } From 8852ddb4dcdfe6f877a02f79bf2bca9ae63c039a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:16 -0400 Subject: [PATCH 0307/1050] bnxt_en: Simplify VLAN receive logic. Since both CTAG and STAG rx acceleration must be enabled together, we only need to check one feature flag (NETIF_F_HW_VLAN_CTAG_RX) before calling __vlan_hwaccel_put_tag(). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 +++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ae2b2646604a..c777cde85ce4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1113,19 +1113,13 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); - if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) { - netdev_features_t features = skb->dev->features; + if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vlan_proto = tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) { - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - tpa_info->metadata & - RX_CMP_FLAGS2_METADATA_VID_MASK); - } + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); @@ -1278,19 +1272,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, skb->protocol = eth_type_trans(skb, dev); - if (rxcmp1->rx_cmp_flags2 & - cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) { - netdev_features_t features = skb->dev->features; + if ((rxcmp1->rx_cmp_flags2 & + cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); + u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK; u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - meta_data & - RX_CMP_FLAGS2_METADATA_VID_MASK); + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); From 9f647a6de9926f4844feb8c5e21c78d4e61c55ab Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 09:21:44 +0100 Subject: [PATCH 0308/1050] net: fec: fix spelling mistakes and add missing newline trivial fix to spelling mistakes and add missing newline in pr_err messages Signed-off-by: Colin Ian King Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3c0255e98535..fea0f330ddbd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2416,24 +2416,24 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames > 255) { - pr_err("Rx coalesced frames exceed hardware limiation"); + pr_err("Rx coalesced frames exceed hardware limitation\n"); return -EINVAL; } if (ec->tx_max_coalesced_frames > 255) { - pr_err("Tx coalesced frame exceed hardware limiation"); + pr_err("Tx coalesced frame exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } From 7b01b8e847d00cf9cf0c2c3aa8fdfc4126dca024 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 16:08:41 +0100 Subject: [PATCH 0309/1050] gtp: #define _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Fix clang build warning: ./include/uapi/linux/gtp.h:1:9: warning: '_UAPI_LINUX_GTP_H_' is used as a header guard here, followed by #define of a different macro [-Wheader-guard] fix by defining _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Signed-off-by: Colin Ian King Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/uapi/linux/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index ca1054dd8249..72a04a0e8cce 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -1,5 +1,5 @@ #ifndef _UAPI_LINUX_GTP_H_ -#define _UAPI_LINUX_GTP_H__ +#define _UAPI_LINUX_GTP_H_ enum gtp_genl_cmds { GTP_CMD_NEWPDP, From 1a0f7d2984f3864e64a43714b4a0999b5a27cff5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:47 +0100 Subject: [PATCH 0310/1050] net: cls_u32: fix error code for invalid flags 'err' variable is not set in this test, we would return whatever previous test set 'err' to. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 079b43b3c5d2..b17e090f2fe1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -863,7 +863,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(flags)) - return err; + return -EINVAL; } n = (struct tc_u_knode *)*arg; From d47a0f387fe907bdb0430a398850c1cb80eb7def Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:48 +0100 Subject: [PATCH 0311/1050] net: cls_u32: be more strict about skip-sw flag Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b17e090f2fe1..fe05449537a3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -457,20 +457,21 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; + if (!tc_should_offload(dev, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } From aafddbf0cffeb790f919436285328c762279b5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:12:39 -0700 Subject: [PATCH 0312/1050] fq_codel: return non zero qlen in class dumps We properly scan the flow list to count number of packets, but John passed 0 to gnet_stats_copy_queue() so we report a zero value to user space instead of the result. Fixes: 640158536632 ("net: sched: restrict use of qstats qlen") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index fff7867f4a4f..da250b2e06ae 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -661,7 +661,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; } - if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0) + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (idx < q->flows_cnt) return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); From a03e6fe569713fb3ff0714f8fd7c8785c0ca9e22 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 6 Jun 2016 09:54:30 -0700 Subject: [PATCH 0313/1050] act_police: fix a crash during removal The police action is using its own code to initialize tcf hash info, which makes us to forgot to initialize a->hinfo correctly. Fix this by calling the helper function tcf_hash_create() directly. This patch fixed the following crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] __lock_acquire+0xd3/0xf91 PGD d3c34067 PUD d3e18067 PMD 0 Oops: 0000 [#1] SMP CPU: 2 PID: 853 Comm: tc Not tainted 4.6.0+ #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800d3e28040 ti: ffff8800d3f6c000 task.ti: ffff8800d3f6c000 RIP: 0010:[] [] __lock_acquire+0xd3/0xf91 RSP: 0000:ffff88011b203c80 EFLAGS: 00010002 RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000028 RBP: ffff88011b203d40 R08: 0000000000000001 R09: 0000000000000000 R10: ffff88011b203d58 R11: ffff88011b208000 R12: 0000000000000001 R13: ffff8800d3e28040 R14: 0000000000000028 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 00000000d4be1000 CR4: 00000000000006e0 Stack: ffff8800d3e289c0 0000000000000046 000000001b203d60 ffffffff00000000 0000000000000000 ffff880000000000 0000000000000000 ffffffff00000000 ffffffff8187142c ffff88011b203ce8 ffff88011b203ce8 ffffffff8101dbfc Call Trace: [] ? __tcf_hash_release+0x77/0xd1 [] ? native_sched_clock+0x1a/0x35 [] ? native_sched_clock+0x1a/0x35 [] ? sched_clock_local+0x11/0x78 [] ? mark_lock+0x24/0x201 [] lock_acquire+0x120/0x1b4 [] ? lock_acquire+0x120/0x1b4 [] ? __tcf_hash_release+0x77/0xd1 [] _raw_spin_lock_bh+0x3c/0x72 [] ? __tcf_hash_release+0x77/0xd1 [] __tcf_hash_release+0x77/0xd1 [] tcf_action_destroy+0x49/0x7c [] tcf_exts_destroy+0x20/0x2d [] u32_destroy_key+0x1b/0x4d [] u32_delete_key_freepf_rcu+0x1b/0x1d [] rcu_process_callbacks+0x610/0x82e [] ? u32_destroy_key+0x4d/0x4d [] __do_softirq+0x191/0x3f4 Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_police.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b884dae692a1..c557789765dc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -38,7 +38,7 @@ struct tcf_police { bool peak_present; }; #define to_police(pc) \ - container_of(pc, struct tcf_police, common) + container_of(pc->priv, struct tcf_police, common) #define POL_TAB_MASK 15 @@ -119,14 +119,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { - unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; int size; if (nla == NULL) @@ -145,7 +143,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (parm->index) { if (tcf_hash_search(tn, a, parm->index)) { - police = to_police(a->priv); + police = to_police(a); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -156,16 +154,15 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, /* not replacing */ return -EEXIST; } + } else { + ret = tcf_hash_create(tn, parm->index, NULL, a, + sizeof(*police), bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) - return -ENOMEM; - ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (bind) - police->tcf_bindcnt = 1; + police = to_police(a); override: if (parm->rate.rate) { err = -ENOMEM; @@ -237,16 +234,8 @@ override: return ret; police->tcfp_t_c = ktime_get_ns(); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(tn); - police->tcf_tm.install = jiffies; - police->tcf_tm.lastuse = jiffies; - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - spin_lock_bh(&hinfo->lock); - hlist_add_head(&police->tcf_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + tcf_hash_insert(tn, a); - a->priv = police; return ret; failure_unlock: @@ -255,7 +244,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - kfree(police); + tcf_hash_cleanup(a, est); return err; } From 92c075dbdeed02bdf293cb0f513bad70aa714b8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jun 2016 22:50:39 +0200 Subject: [PATCH 0314/1050] net: sched: fix tc_should_offload for specific clsact classes When offloading classifiers such as u32 or flower to hardware, and the qdisc is clsact (TC_H_CLSACT), then we need to differentiate its classes, since not all of them handle ingress, therefore we must leave those in software path. Add a .tcf_cl_offload() callback, so we can generically handle them, tested on ixgbe. Fixes: 10cbc6843446 ("net/sched: cls_flower: Hardware offloaded filters statistics support") Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 +++++++--- include/net/sch_generic.h | 1 + net/sched/cls_flower.c | 6 +++--- net/sched/cls_u32.c | 8 ++++---- net/sched/sch_ingress.c | 12 ++++++++++++ 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f7efa88f210..3722dda0199d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -392,16 +392,20 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(struct net_device *dev, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) { + const struct Qdisc *sch = tp->q; + const struct Qdisc_class_ops *cops = sch->ops->cl_ops; + if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) return false; - if (!dev->netdev_ops->ndo_setup_tc) return false; + if (cops && cops->tcf_cl_offload) + return cops->tcf_cl_offload(tp->classid); return true; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6803af17dfcf..62d553184e91 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -168,6 +168,7 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 730aacafc22d..b3b7978f4182 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -171,7 +171,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_DESTROY; @@ -194,7 +194,7 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return; offload.command = TC_CLSFLOWER_REPLACE; @@ -216,7 +216,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_STATS; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index fe05449537a3..27b99fd774d7 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -440,7 +440,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, @@ -457,7 +457,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.type = TC_SETUP_CLSU32; @@ -485,7 +485,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; @@ -508,7 +508,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { + if (tc_should_offload(dev, tp, flags)) { offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; offload.cls_u32->knode.handle = n->handle; offload.cls_u32->knode.fshift = n->fshift; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 10adbc617905..8fe6999b642a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -27,6 +27,11 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid) return TC_H_MIN(classid) + 1; } +static bool ingress_cl_offload(u32 classid) +{ + return true; +} + static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -86,6 +91,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, + .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, }; @@ -110,6 +116,11 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } +static bool clsact_cl_offload(u32 classid) +{ + return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); +} + static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -158,6 +169,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = clsact_find_tcf, + .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, }; From ce3cf4ec0305919fc69a972f6c2b2efd35d36abc Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: [PATCH 0315/1050] tcp: record TLP and ER timer stats in v6 stats The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 79e33e02f11a..f36c2d076fce 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1721,7 +1721,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { From 2c2a63e301fd19ccae673e79de59b30a232ff7f9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Jun 2016 10:01:23 +1000 Subject: [PATCH 0316/1050] powerpc/pseries: Fix IBM_ARCH_VEC_NRCORES_OFFSET since POWER8NVL was added The recent commit 7cc851039d64 ("powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call") added a new PVR mask & value to the start of the ibm_architecture_vec[] array. However it missed the fact that further down in the array, we hard code the offset of one of the fields, and then at boot use that value to patch the value in the array. This means every update to the array must also update the #define, ugh. This means that on pseries machines we will misreport to firmware the number of cores we support, by a factor of threads_per_core. Fix it for now by updating the #define. Fixes: 7cc851039d64 ("powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ccd2037c797f..6ee4b72cda42 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -719,7 +719,7 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 125 +#define IBM_ARCH_VEC_NRCORES_OFFSET 133 W(NR_CPUS), /* number of cores supported */ 0, 0, From b3c0a4dab7e35a9b6d69c0415641d2280fdefb2b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 6 Jun 2016 18:48:38 +0200 Subject: [PATCH 0317/1050] of: fix autoloading due to broken modalias with no 'compatible' Because of an improper dereference, a stray 'C' character was output to the modalias when no 'compatible' was specified. This is the case for some old PowerMac drivers which only set the 'name' property. Fix it to let them match again. Reported-by: Mathieu Malaterre Signed-off-by: Wolfram Sang Tested-by: Mathieu Malaterre Cc: Philipp Zabel Cc: Andreas Schwab Fixes: 6543becf26fff6 ("mod/file2alias: make modalias generation safe for cross compiling") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Ellerman --- scripts/mod/file2alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index a9155077feef..fec75786f75b 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -384,7 +384,7 @@ static void do_of_entry_multi(void *symval, struct module *mod) len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*", (*type)[0] ? *type : "*"); - if (compatible[0]) + if ((*compatible)[0]) sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "", *compatible); From 2c2c1af4497514da1be2b571066859701dd79231 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Jun 2016 17:38:52 -0700 Subject: [PATCH 0318/1050] cpufreq: intel_pstate: Fix code ordering in intel_pstate_set_policy() The limits->max_perf is rounded_up but immediately overwritten by another assignment to limits->max_perf. Move that operation to the correct location. While here also added a pr_debug() call in ->set_policy to aid in debugging. Fixes: 785ee2788141 (cpufreq: intel_pstate: Fix limits->max_perf rounding error) Signed-off-by: Srinivas Pandruvada [ rjw : Subject & changelog ] Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0d159b513469..724b9056aa6b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1460,6 +1460,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); + pr_debug("set_policy cpuinfo.max %u policy->max %u\n", + policy->cpuinfo.max_freq, policy->max); + cpu = all_cpu_data[0]; if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && @@ -1495,13 +1498,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_sysfs_pct); limits->max_perf_pct = max(limits->min_policy_pct, limits->max_perf_pct); - limits->max_perf = round_up(limits->max_perf, FRAC_BITS); /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); limits->min_perf = div_fp(limits->min_perf_pct, 100); limits->max_perf = div_fp(limits->max_perf_pct, 100); + limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: intel_pstate_set_update_util_hook(policy->cpu); From 983e600e88835f0321d1a0ea06f52d48b7b5a544 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Jun 2016 17:38:53 -0700 Subject: [PATCH 0319/1050] cpufreq: intel_pstate: Fix ->set_policy() interface for no_turbo When turbo is disabled, the ->set_policy() interface is broken. For example, when turbo is disabled and cpuinfo.max = 2900000 (full max turbo frequency), setting the limits results in frequency less than the requested one: Set 1000000 KHz results in 0700000 KHz Set 1500000 KHz results in 1100000 KHz Set 2000000 KHz results in 1500000 KHz This is because the limits->max_perf fraction is calculated using the max turbo frequency as the reference, but when the max P-State is capped in intel_pstate_get_min_max(), the reference is not the max turbo P-State. This results in reducing max P-State. One option is to always use max turbo as reference for calculating limits. But this will not be correct. By definition the intel_pstate sysfs limits, shows percentage of available performance. So when BIOS has disabled turbo, the available performance is max non turbo. So the max_perf_pct should still show 100%. Signed-off-by: Srinivas Pandruvada [ rjw : Subject & changelog, rewrite in fewer lines of code ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 724b9056aa6b..ee367e9b7d2e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1561,8 +1561,11 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->cpuinfo.max_freq = - cpu->pstate.turbo_pstate * cpu->pstate.scaling; + update_turbo_state(); + policy->cpuinfo.max_freq = limits->turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; + policy->cpuinfo.max_freq *= cpu->pstate.scaling; + intel_pstate_init_acpi_perf_limits(policy); policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); From 3d56c25e3bb0726a5c5e16fc2d9e38f8ed763085 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:26:55 -0400 Subject: [PATCH 0320/1050] fix d_walk()/non-delayed __d_free() race Ascend-to-parent logics in d_walk() depends on all encountered child dentries not getting freed without an RCU delay. Unfortunately, in quite a few cases it is not true, with hard-to-hit oopsable race as the result. Fortunately, the fix is simiple; right now the rule is "if it ever been hashed, freeing must be delayed" and changing it to "if it ever had a parent, freeing must be delayed" closes that hole and covers all cases the old rule used to cover. Moreover, pipes and sockets remain _not_ covered, so we do not introduce RCU delay in the cases which are the reason for having that delay conditional in the first place. Cc: stable@vger.kernel.org # v3.2+ (and watch out for __d_materialise_dentry()) Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index ad4a542e9bab..817c243c1ff1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1636,7 +1636,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -2358,7 +2358,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2843,6 +2842,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); From a01e718f7241c53f564402f7acff373eed5bd166 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:53:51 -0400 Subject: [PATCH 0321/1050] fix a regression in atomic_open() open("/foo/no_such_file", O_RDONLY | O_CREAT) on should fail with EACCES when /foo is not writable; failing with ENOENT is obviously wrong. That got broken by a braino introduced when moving the creat_error logics from atomic_open() to lookup_open(). Easy to fix, fortunately. Spotted-by: "Yan, Zheng" Tested-by: "Yan, Zheng" Signed-off-by: Al Viro --- fs/namei.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d7c0cac56d89..28cb1cd8507c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2995,9 +2995,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; + if (unlikely(d_is_negative(dentry))) { + error = -ENOENT; + } else { + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; + } } } dput(dentry); From 1607f09c226d1378439c411baaaa020042750338 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Jun 2016 23:14:14 +0200 Subject: [PATCH 0322/1050] coredump: fix dumping through pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset in the core file used to be tracked with ->written field of the coredump_params structure. The field was retired in favour of file->f_pos. However, ->f_pos is not maintained for pipes which leads to breakage. Restore explicit tracking of the offset in coredump_params. Introduce ->pos field for this purpose since ->written was already reused. Fixes: a00839395103 ("get rid of coredump_params->written"). Reported-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Mateusz Guzik Reviewed-by: Omar Sandoval Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/coredump.c | 4 +++- include/linux/binfmts.h | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 84fb984f29c1..85c85eb3e245 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -172,7 +172,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (rc < 0) goto out; - skip = roundup(cprm->file->f_pos - total + sz, 4) - cprm->file->f_pos; + skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; out: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22ef32f..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 576e4639ca60..314b3caa701c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -65,6 +65,7 @@ struct coredump_params { unsigned long limit; unsigned long mm_flags; loff_t written; + loff_t pos; }; /* From 7be4881846cfa67f968eaf5b7b50d9623a652afb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Jun 2016 11:36:13 +0200 Subject: [PATCH 0323/1050] hwmon: (fam15h_power) Disable preemption when reading registers We need to read a bunch of registers on each compute unit and possibly on the current CPU too. Disable preemption around it. Otherwise, you get: BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/327 caller is read_registers+0x6a/0x110 [fam15h_power] CPU: 3 PID: 327 Comm: systemd-udevd Not tainted 4.7.0-rc1+ #4 Hardware name: HP HP EliteBook 745 G3/807E, BIOS N73 Ver. 01.08 01/28/2016 ... Suggested-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Rui Huang Cc: Sherry Hurwitz Cc: Guenter Roeck Acked-by: Huang Rui Tested-by: Huang Rui Fixes: fa7943449943 ("hwmon: (fam15h_power) Add compute unit accumulated power") Signed-off-by: Guenter Roeck --- drivers/hwmon/fam15h_power.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index eb97a9241d17..15aa49d082c4 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -172,9 +172,9 @@ static void do_read_registers_on_cu(void *_data) */ static int read_registers(struct fam15h_power_data *data) { - int this_cpu, ret, cpu; int core, this_core; cpumask_var_t mask; + int ret, cpu; ret = zalloc_cpumask_var(&mask, GFP_KERNEL); if (!ret) @@ -183,7 +183,6 @@ static int read_registers(struct fam15h_power_data *data) memset(data->cu_on, 0, sizeof(int) * MAX_CUS); get_online_cpus(); - this_cpu = smp_processor_id(); /* * Choose the first online core of each compute unit, and then @@ -205,12 +204,9 @@ static int read_registers(struct fam15h_power_data *data) cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask); } - if (cpumask_test_cpu(this_cpu, mask)) - do_read_registers_on_cu(data); + on_each_cpu_mask(mask, do_read_registers_on_cu, data, true); - smp_call_function_many(mask, do_read_registers_on_cu, data, true); put_online_cpus(); - free_cpumask_var(mask); return 0; From 1069ad8f65a474bb30a487a001511b25f3575f5e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 1 Jun 2016 11:43:12 +0200 Subject: [PATCH 0324/1050] hwmon: (ina2xx) Document compatible for INA231 Document the compatible for INA231 sensor. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/ina2xx.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/hwmon/ina2xx.txt b/Documentation/devicetree/bindings/hwmon/ina2xx.txt index 9bcd5e87830d..02af0d94e921 100644 --- a/Documentation/devicetree/bindings/hwmon/ina2xx.txt +++ b/Documentation/devicetree/bindings/hwmon/ina2xx.txt @@ -7,6 +7,7 @@ Required properties: - "ti,ina220" for ina220 - "ti,ina226" for ina226 - "ti,ina230" for ina230 + - "ti,ina231" for ina231 - reg: I2C address Optional properties: From 38bab98a8da4a2ff5c3f55b045b0c8bf6901362f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 5 Jun 2016 09:35:43 +0200 Subject: [PATCH 0325/1050] hwmon: (lm90) use proper type for update_interval The code handles this variable always as unsigned, so adapt the type. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index c9ff08dbe10c..e30a5939dc0d 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -375,7 +375,7 @@ struct lm90_data { int kind; u32 flags; - int update_interval; /* in milliseconds */ + unsigned int update_interval; /* in milliseconds */ u8 config_orig; /* Original configuration register value */ u8 convrate_orig; /* Original conversion rate register value */ From 9690c15742688e9cb5ee4aa0b08e458551ceea13 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 2 Jun 2016 15:14:48 +0530 Subject: [PATCH 0326/1050] powerpc/mm/radix: Fix always false comparison against MMU_NO_CONTEXT In some of the radix TLB flush routines, we use a local to store the mm->context.id, AKA the PID. Currently we use an int, but the PID is unsigned long, so large values of PID will be truncated. In particular MMU_NO_CONTEXT is -1, which means all our comparisons against that value can never be true. This means we'll issue TLB flushes when we shouldn't on radix enabled machines. Fix it by using an unsigned long for the local. Discovered by Coverity. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Balbir Singh [mpe: Write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0fdaf93a3e09..54efba2fd66e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -117,7 +117,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, */ void radix__local_flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; @@ -130,7 +130,7 @@ EXPORT_SYMBOL(radix__local_flush_tlb_mm); void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; @@ -160,7 +160,7 @@ static int mm_is_core_local(struct mm_struct *mm) void radix__flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; @@ -185,7 +185,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; From 3b6d1eb7ea65f4aa64115cf9ba02c190e5c9f6de Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 19 May 2016 13:24:30 +0530 Subject: [PATCH 0327/1050] powerpc/mm/hash: Compute the segment size correctly for ISA 3.0 PowerISA 3.0 encodes the segment size in the second half of hash page table entry. Update hpte_decode() accordingly. Fixes: 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d873f6507f72..40e05e7f43de 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -550,7 +550,11 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, } } /* This works for all page sizes, and for 256M and 1T segments */ - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; + else + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); From c66f59ee5050447b3da92d36f5385a847990a894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pecio?= Date: Tue, 7 Jun 2016 12:34:45 +0200 Subject: [PATCH 0328/1050] USB: OHCI: Don't mark EDs as ED_OPER if scheduling fails Since ed_schedule begins with marking the ED as "operational", the ED may be left in such state even if scheduling actually fails. This allows future submission attempts to smuggle this ED to the hardware behind the scheduler's back and without linking it to the ohci->eds_in_use list. The former causes bandwidth saturation and data loss on isoc endpoints, the latter crashes the kernel when attempt is made to unlink such ED from this list. Fix ed_schedule to update ED state only on successful return. Signed-off-by: Michal Pecio Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-q.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index d029bbe9eb36..641fed609911 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -183,7 +183,6 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed) { int branch; - ed->state = ED_OPER; ed->ed_prev = NULL; ed->ed_next = NULL; ed->hwNextED = 0; @@ -259,6 +258,8 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed) /* the HC may not see the schedule updates yet, but if it does * then they'll be properly ordered. */ + + ed->state = ED_OPER; return 0; } From dcb21ad4385731b7fc3ef39d255685f2f63c8c5d Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 30 May 2016 19:16:33 +0530 Subject: [PATCH 0329/1050] USB: mos7720: delete parport parport subsystem has introduced parport_del_port() to delete a port when it is going away. Without parport_del_port() the registered port will not be unregistered. To reproduce and verify the error: Command to be used is : ls /sys/bus/parport/devices 1) without the device attached there is no output as there is no registered parport. 2) Attach the device, and the command will show "parport0". 3) Remove the device and the command still shows "parport0". 4) Attach the device again and we get "parport1". With the patch applied: 1) without the device attached there is no output as there is no registered parport. 2) Attach the device, and the command will show "parport0". 3) Remove the device and there is no output as "parport0" is now removed. 4) Attach device again to get "parport0" again. Cc: # 4.2+ Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 2eddbe538cda..5608af4a369d 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -2007,6 +2007,7 @@ static void mos7720_release(struct usb_serial *serial) urblist_entry) usb_unlink_urb(urbtrack->urb); spin_unlock_irqrestore(&mos_parport->listlock, flags); + parport_del_port(mos_parport->pp); kref_put(&mos_parport->ref_count, destroy_mos_parport); } From f8a15a9650694feaa0dabf197b0c94d37cd3fb42 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 26 May 2016 17:23:29 +0200 Subject: [PATCH 0330/1050] usb: host: ehci-tegra: Grab the correct UTMI pads reset There are three EHCI controllers on Tegra SoCs, each with its own reset line. However, the first controller contains a set of UTMI configuration registers that are shared with its siblings. These registers will only be reset as part of the first controller's reset. For proper operation it must be ensured that the UTMI configuration registers are reset before any of the EHCI controllers are enabled, irrespective of the probe order. Commit a47cc24cd1e5 ("USB: EHCI: tegra: Fix probe order issue leading to broken USB") introduced code that ensures the first controller is always reset before setting up any of the controllers, and is never again reset afterwards. This code, however, grabs the wrong reset. Each EHCI controller has two reset controls attached: 1) the USB controller reset and 2) the UTMI pads reset (really the first controller's reset). In order to reset the UTMI pads registers the code must grab the second reset, but instead it grabbing the first. Fixes: a47cc24cd1e5 ("USB: EHCI: tegra: Fix probe order issue leading to broken USB") Acked-by: Jon Hunter Cc: stable@vger.kernel.org Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 4031b372008e..c1c1024a054c 100644 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -89,7 +89,7 @@ static int tegra_reset_usb_controller(struct platform_device *pdev) if (!usb1_reset_attempted) { struct reset_control *usb1_reset; - usb1_reset = of_reset_control_get(phy_np, "usb"); + usb1_reset = of_reset_control_get(phy_np, "utmi-pads"); if (IS_ERR(usb1_reset)) { dev_warn(&pdev->dev, "can't get utmi-pads reset from the PHY\n"); From 7cc9ca5a994c90fa771135e50b7a9cb99a65aa1d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 26 May 2016 17:23:30 +0200 Subject: [PATCH 0331/1050] usb: host: ehci-tegra: Avoid getting the same reset twice Starting with commit 0b52297f2288 ("reset: Add support for shared reset controls") there is a reference count for reset control assertions. The goal is to allow resets to be shared by multiple devices and an assert will take effect only when all instances have asserted the reset. In order to preserve backwards-compatibility, all reset controls become exclusive by default. This is to ensure that reset_control_assert() can immediately assert in hardware. However, this new behaviour triggers the following warning in the EHCI driver for Tegra: [ 3.365019] ------------[ cut here ]------------ [ 3.369639] WARNING: CPU: 0 PID: 1 at drivers/reset/core.c:187 __of_reset_control_get+0x16c/0x23c [ 3.382151] Modules linked in: [ 3.385214] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-next-20160503 #140 [ 3.392769] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) [ 3.399046] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 3.406787] [] (show_stack) from [] (dump_stack+0x90/0xa4) [ 3.414007] [] (dump_stack) from [] (__warn+0xe8/0x100) [ 3.420964] [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [ 3.428525] [] (warn_slowpath_null) from [] (__of_reset_control_get+0x16c/0x23c) [ 3.437648] [] (__of_reset_control_get) from [] (tegra_ehci_probe+0x394/0x518) [ 3.446600] [] (tegra_ehci_probe) from [] (platform_drv_probe+0x4c/0xb0) [ 3.455029] [] (platform_drv_probe) from [] (driver_probe_device+0x1ec/0x330) [ 3.463892] [] (driver_probe_device) from [] (__driver_attach+0xb8/0xbc) [ 3.472320] [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [ 3.480489] [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [ 3.488743] [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [ 3.496738] [] (driver_register) from [] (do_one_initcall+0x40/0x170) [ 3.504909] [] (do_one_initcall) from [] (kernel_init_freeable+0x158/0x1f8) [ 3.513600] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [ 3.521770] [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) [ 3.529361] ---[ end trace 4bda87dbe4ecef8a ]--- The reason is that Tegra SoCs have three EHCI controllers, each with a separate reset line. However the first controller contains UTMI pads configuration registers that are shared with its siblings and that are reset as part of the first controller's reset. There is special code in the driver to assert and deassert this shared reset at probe time, and it does so irrespective of which controller is probed first to ensure that these shared registers are reset before any of the controllers are initialized. Unfortunately this means that if the first controller gets probed first, it will request its own reset line and will subsequently request the same reset line again (temporarily) to perform the reset. This used to work fine before the above-mentioned commit, but now triggers the new WARN. Work around this by making sure we reuse the controller's reset if the controller happens to be the first controller. Cc: Philipp Zabel Cc: Hans de Goede Reviewed-by: Philipp Zabel Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-tegra.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index c1c1024a054c..9a3d7db5be57 100644 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -81,15 +81,23 @@ static int tegra_reset_usb_controller(struct platform_device *pdev) struct usb_hcd *hcd = platform_get_drvdata(pdev); struct tegra_ehci_hcd *tegra = (struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv; + bool has_utmi_pad_registers = false; phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0); if (!phy_np) return -ENOENT; + if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers")) + has_utmi_pad_registers = true; + if (!usb1_reset_attempted) { struct reset_control *usb1_reset; - usb1_reset = of_reset_control_get(phy_np, "utmi-pads"); + if (!has_utmi_pad_registers) + usb1_reset = of_reset_control_get(phy_np, "utmi-pads"); + else + usb1_reset = tegra->rst; + if (IS_ERR(usb1_reset)) { dev_warn(&pdev->dev, "can't get utmi-pads reset from the PHY\n"); @@ -99,13 +107,15 @@ static int tegra_reset_usb_controller(struct platform_device *pdev) reset_control_assert(usb1_reset); udelay(1); reset_control_deassert(usb1_reset); + + if (!has_utmi_pad_registers) + reset_control_put(usb1_reset); } - reset_control_put(usb1_reset); usb1_reset_attempted = true; } - if (!of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers")) { + if (!has_utmi_pad_registers) { reset_control_assert(tegra->rst); udelay(1); reset_control_deassert(tegra->rst); From 1700bd9872dcd06a284c38c1ce33bba0f239dbd3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:26 +0200 Subject: [PATCH 0332/1050] MAINTAINERS: Add file patterns for usb device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..beec5a36f12a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11956,6 +11956,7 @@ L: linux-usb@vger.kernel.org W: http://www.linux-usb.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git S: Supported +F: Documentation/devicetree/bindings/usb/ F: Documentation/usb/ F: drivers/usb/ F: include/linux/usb.h From 815c9d6a3caeb803ae36d09788c8969b85ce7e4c Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Fri, 20 May 2016 16:35:07 -0500 Subject: [PATCH 0333/1050] usb: host: ehci-msm: Conditionally call ehci suspend/resume This patch fixes a suspend/resume issue where the driver is blindly calling ehci_suspend/resume functions when the ehci hasn't been setup. This results in a crash during suspend/resume operations. Signed-off-by: Andy Gross Tested-by: Pramod Gurav Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-msm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c index d3afc89d00f5..2f8d3af811ce 100644 --- a/drivers/usb/host/ehci-msm.c +++ b/drivers/usb/host/ehci-msm.c @@ -179,22 +179,32 @@ static int ehci_msm_remove(struct platform_device *pdev) static int ehci_msm_pm_suspend(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); + struct ehci_hcd *ehci = hcd_to_ehci(hcd); bool do_wakeup = device_may_wakeup(dev); dev_dbg(dev, "ehci-msm PM suspend\n"); - return ehci_suspend(hcd, do_wakeup); + /* Only call ehci_suspend if ehci_setup has been done */ + if (ehci->sbrn) + return ehci_suspend(hcd, do_wakeup); + + return 0; } static int ehci_msm_pm_resume(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); + struct ehci_hcd *ehci = hcd_to_ehci(hcd); dev_dbg(dev, "ehci-msm PM resume\n"); - ehci_resume(hcd, false); + + /* Only call ehci_resume if ehci_setup has been done */ + if (ehci->sbrn) + ehci_resume(hcd, false); return 0; } + #else #define ehci_msm_pm_suspend NULL #define ehci_msm_pm_resume NULL From 11c011a5e777c83819078a18672543f04482b3ec Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 19 May 2016 11:12:56 +0100 Subject: [PATCH 0334/1050] usb: echi-hcd: Add ehci_setup check before echi_shutdown This patch protects system from crashing at shutdown in cases where usb host is not added yet from OTG controller driver. As ehci_setup() not done yet, so stop accessing registers or variables initialized as part of ehci_setup(). The use case is simple, for boards like DB410c where the usb host or device functionality is decided based on the micro-usb cable presence. If the board boots up with micro-usb connected, the OTG driver like echi-msm would not add the usb host by default. However a system shutdown would go and access registers and uninitialized variables, resulting in below crash. Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = ffffffc034581000 [00000008] *pgd=0000000000000000, *pud=0000000000000000 CPU: 2 PID: 1957 Comm: reboot Not tainted 4.6.0+ #99 task: ffffffc034bc0000 ti: ffffffc0345cc000 task.ti: ffffffc0345cc000 PC is at ehci_halt+0x54/0x108 LR is at ehci_halt+0x38/0x108 pc : [] lr : [] pstate: a00001c5 sp : ffffffc0345cfc60 x29: ffffffc0345cfc60 x28: ffffffc0345cc000 x27: ffffff8008a4d000 x26: 000000000000008e x25: ffffff8008d86cb0 x24: ffffff800908b040 x23: ffffffc036068870 x22: ffffff8009d0a000 x21: ffffffc03512a410 x20: ffffffc03512a410 x19: ffffffc03512a338 x18: 00000000000065ba x17: ffffff8009b16b80 x16: 0000000000000003 x15: 00000000000065b9 x14: 00000000000065b6 x13: 0000000000000000 x12: 0000000000000000 x11: 000000000000003d x10: ffffffc0345cf9e0 x9 : 0000000000000001 x8 : ffffffc0345cc000 x7 : ffffff8008698360 x6 : 0000000000000000 x5 : 0000000000000080 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000008 x0 : ffffffc034bc0000 Process reboot (pid: 1957, stack limit = 0xffffffc0345cc020) Stack: (0xffffffc0345cfc60 to 0xffffffc0345d0000) fc60: ffffffc0345cfc90 ffffff8008698448 ffffffc03512a338 ffffffc03512a338 fc80: ffffffc03512a410 ffffff8008a3bbfc ffffffc0345cfcc0 ffffff8008698548 fca0: ffffffc03512a338 ffffffc03512a000 ffffffc03512a410 ffffff8009d0a000 fcc0: ffffffc0345cfcf0 ffffff800865d2bc ffffffc036068828 ffffffc036068810 fce0: ffffffc036003810 ffffff800853f43c ffffffc0345cfd00 ffffff800854338c fd00: ffffffc0345cfd10 ffffff800853f45c ffffffc0345cfd60 ffffff80080e0f48 fd20: 0000000000000000 0000000001234567 ffffff8008f8c000 ffffff8008f8c060 fd40: 0000000000000000 0000000000000015 0000000000000120 ffffff80080e0f30 fd60: ffffffc0345cfd70 ffffff80080e1020 ffffffc0345cfd90 ffffff80080e12fc fd80: 0000000000000000 0000000001234567 0000000000000000 ffffff8008085e70 fda0: 0000000000000000 0000005592905000 ffffffffffffffff 0000007f79daf1cc fdc0: 0000000000000000 0000000000000000 0000007ffcbb1198 000000000000000a fde0: 00000055928d3f58 0000000000000001 ffffffc034900000 00000000fffffffe fe00: ffffffc034900000 0000007f79da902c ffffffc0345cfe40 ffffff800820af38 fe20: 0000000000000000 0000007ffcbb1078 ffffffffffffffff ffffff80081e9b38 fe40: ffffffc0345cfe60 ffffff80081eb410 ffffffc0345cfe60 ffffff80081eb444 fe60: ffffffc0345cfec0 ffffff80081ec4f4 0000000000000000 0000007ffcbb1078 fe80: ffffffffffffffff 0000000000000015 ffffffc0345cfec0 0000007ffcbb1078 fea0: 0000000000000002 000000000000000a ffffffffffffffff 0000000000000000 fec0: 0000000000000000 ffffff8008085e70 fffffffffee1dead 0000000028121969 fee0: 0000000001234567 0000000000000000 ffffffffffffffff 8080800000800000 ff00: 0000800000808080 0000007ffcbb10f0 000000000000008e fefeff54918cb8c7 ff20: 7f7f7f7fffffffff 0101010101010101 0000000000000010 0000000000000000 ff40: 0000000000000000 0000007f79e33588 0000005592905eb8 0000007f79daf1b0 ff60: 0000007ffcbb1340 0000005592906000 0000005592905000 0000005592906000 ff80: 0000005592907000 0000000000000002 0000007ffcbb1d98 0000005592906000 ffa0: 00000055928d2000 0000000000000000 0000000000000000 0000007ffcbb1aa0 ffc0: 00000055928b819c 0000007ffcbb1aa0 0000007f79daf1cc 0000000000000000 ffe0: fffffffffee1dead 000000000000008e 05ef555057155555 d555544d55d775d3 Call trace: Exception stack(0xffffffc0345cfaa0 to 0xffffffc0345cfbc0) Set corner to 6 faa0: ffffffc03512a338 ffffffc03512a410 ffffffc0345cfc60 ffffff800869837c fac0: ffffff8008114210 0000000100000001 ffffff8009ce1b20 ffffff8009ce5f20 fae0: ffffffc0345cfb80 ffffff80081145a8 ffffffc0345cfc10 ffffff800810b924 fb00: ffffffc0345cc000 00000000000001c0 ffffffc03512a410 ffffff8009d0a000 fb20: ffffffc036068870 ffffff800908b040 ffffff8008d86cb0 000000000000008e fb40: ffffffc034bc0000 0000000000000008 0000000000000000 0000000000000000 fb60: 0000000000000001 0000000000000080 0000000000000000 ffffff8008698360 fb80: ffffffc0345cc000 0000000000000001 ffffffc0345cf9e0 000000000000003d fba0: 0000000000000000 0000000000000000 00000000000065b6 00000000000065b9 [] ehci_halt+0x54/0x108 [] ehci_silence_controller+0x18/0xcc [] ehci_shutdown+0x4c/0x64 [] usb_hcd_platform_shutdown+0x1c/0x24 [] platform_drv_shutdown+0x20/0x28 [] device_shutdown+0xf4/0x1b0 [] kernel_restart_prepare+0x34/0x3c [] kernel_restart+0x14/0x74 [] SyS_reboot+0x110/0x21c [] el0_svc_naked+0x24/0x28 Code: 53001c42 350000a2 d5033e9f 91002021 (b9000022) Fixes 4bb3cad7125b ("usb: host: ehci-msm: Register usb shutdown function") Signed-off-by: Srinivas Kandagatla Tested-by: Pramod Gurav Tested-by: Andy Gross Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index ae1b6e69eb96..a962b89b65a6 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -368,6 +368,15 @@ static void ehci_shutdown(struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); + /** + * Protect the system from crashing at system shutdown in cases where + * usb host is not added yet from OTG controller driver. + * As ehci_setup() not done yet, so stop accessing registers or + * variables initialized in ehci_setup() + */ + if (!ehci->sbrn) + return; + spin_lock_irq(&ehci->lock); ehci->shutdown = true; ehci->rh_state = EHCI_RH_STOPPING; From 0b148def403153a4d1565f1640356cb78ce5109f Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 7 Jun 2016 19:14:17 +0900 Subject: [PATCH 0335/1050] bridge: Don't insert unnecessary local fdb entry on changing mac address The missing br_vlan_should_use() test caused creation of an unneeded local fdb entry on changing mac address of a bridge device when there is a vlan which is configured on a bridge port but not on the bridge device. Fixes: 2594e9064a57 ("bridge: vlan: add per-vlan struct and move to rhashtables") Signed-off-by: Toshiaki Makita Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index dcea4f4c62b3..c18080ad4085 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -279,6 +279,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); From 88832a22d6bb50e3b5f9d5ecc6cf26707c35f322 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 19:27:51 +0100 Subject: [PATCH 0336/1050] net-sysfs: fix missing The of_find_net_device_by_node() function is defined in but not included in the .c file that implements it. Fix the following warning by including the header: net/core/net-sysfs.c:1494:19: warning: symbol 'of_find_net_device_by_node' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2b3f76fe65f4..7a0b616557ab 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "net-sysfs.h" From d15d6cf91695674fbabac3b1d2c8a269d9bab5c6 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 2 Jun 2016 16:00:09 -0400 Subject: [PATCH 0337/1050] gpio: 104-dio-48e: Fix control port offset computation off-by-one error There are only two control ports, each controlling three distinct I/O ports. To compute the control port address offset for a respective I/O port, the I/O port address offset should be divided by 3; dividing by 2 may result in not only the wrong address offset but possibly also an out-of-bounds array memory access for a non-existent third control port. Fixes: 1b06d64f7374 ("gpio: Add GPIO support for the ACCES 104-DIO-48E") Signed-off-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-104-dio-48e.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c index 1a647c07be67..fcf776971ca9 100644 --- a/drivers/gpio/gpio-104-dio-48e.c +++ b/drivers/gpio/gpio-104-dio-48e.c @@ -75,7 +75,7 @@ static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip); const unsigned io_port = offset / 8; - const unsigned control_port = io_port / 2; + const unsigned int control_port = io_port / 3; const unsigned control_addr = dio48egpio->base + 3 + control_port*4; unsigned long flags; unsigned control; @@ -115,7 +115,7 @@ static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned offset, { struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip); const unsigned io_port = offset / 8; - const unsigned control_port = io_port / 2; + const unsigned int control_port = io_port / 3; const unsigned mask = BIT(offset % 8); const unsigned control_addr = dio48egpio->base + 3 + control_port*4; const unsigned out_port = (io_port > 2) ? io_port + 1 : io_port; From 0f84f29ff30bdb1bca23017b118b4ea3999cac32 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 3 Jun 2016 14:15:32 +0200 Subject: [PATCH 0338/1050] gpio: zynq: initialize clock even without CONFIG_PM When the PM initialization was moved in the commit referenced below, the code enabling the clock was removed from the probe function. On CONFIG_PM=y kernels, this is not a problem as the pm resume hook enables the clock, but when power management is disabled, all those pm_* functions are noops and the clock is never enabled resulting in a dysfunctional gpio controller. Put the clock initialization back to support CONFIG_PM=n. Cc: stable@vger.kernel.org Signed-off-by: Helmut Grohne Fixes: 3773c195d387 ("gpio: zynq: Do PM initialization earlier to support gpio hogs") Signed-off-by: Linus Walleij --- drivers/gpio/gpio-zynq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index 75c6355b018d..e72794e463aa 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -709,7 +709,13 @@ static int zynq_gpio_probe(struct platform_device *pdev) dev_err(&pdev->dev, "input clock not found.\n"); return PTR_ERR(gpio->clk); } + ret = clk_prepare_enable(gpio->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock.\n"); + return ret; + } + pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -747,6 +753,7 @@ err_pm_put: pm_runtime_put(&pdev->dev); err_pm_dis: pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(gpio->clk); return ret; } From 11f33a6d15bfa397867ac0d7f3481b6dd683286f Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Fri, 3 Jun 2016 19:10:01 +0200 Subject: [PATCH 0339/1050] gpiolib: Fix NULL pointer deference Under some circumstances, a gpiochip might be half cleaned from the gpio_device list. This patch makes sure that the chip pointer is still valid, before calling the match function. [ 104.088296] BUG: unable to handle kernel NULL pointer dereference at 0000000000000090 [ 104.089772] IP: [] of_gpiochip_find_and_xlate+0x15/0x80 [ 104.128273] Call Trace: [ 104.129802] [] ? of_parse_own_gpio+0x1f0/0x1f0 [ 104.131353] [] gpiochip_find+0x60/0x90 [ 104.132868] [] of_get_named_gpiod_flags+0x9a/0x120 ... [ 104.141586] [] gpio_led_probe+0x11b/0x360 Cc: stable@vger.kernel.org Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 24f60d28f0c0..a2c31a5a677f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -869,7 +869,7 @@ struct gpio_chip *gpiochip_find(void *data, spin_lock_irqsave(&gpio_lock, flags); list_for_each_entry(gdev, &gpio_devices, list) - if (match(gdev->chip, data)) + if (gdev->chip && match(gdev->chip, data)) break; /* No match? */ From f4833b8cc7edab57d3f3033e549111a546c2e02b Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Fri, 3 Jun 2016 19:10:02 +0200 Subject: [PATCH 0340/1050] gpiolib: Fix unaligned used of reference counters gpiolib relies on the reference counters to clean up the gpio_device structure. Although the number of get/put is properly aligned on gpiolib.c itself, it does not take into consideration how the referece counters are affected by other external functions such as cdev_add and device_add. Because of this, after the last call to put_device, the reference counter has a value of +3, therefore never calling gpiodevice_release. Due to the fact that some of the device has already been cleaned on gpiochip_remove, the library will end up OOPsing the kernel (e.g. a call to of_gpiochip_find_and_xlate). Cc: stable@vger.kernel.org Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a2c31a5a677f..58d822d7e8da 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -449,7 +449,6 @@ static void gpiodevice_release(struct device *dev) { struct gpio_device *gdev = dev_get_drvdata(dev); - cdev_del(&gdev->chrdev); list_del(&gdev->list); ida_simple_remove(&gpio_ida, gdev->id); kfree(gdev->label); @@ -482,7 +481,6 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) /* From this point, the .release() function cleans up gpio_device */ gdev->dev.release = gpiodevice_release; - get_device(&gdev->dev); pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n", __func__, gdev->base, gdev->base + gdev->ngpio - 1, dev_name(&gdev->dev), gdev->chip->label ? : "generic"); @@ -770,6 +768,8 @@ void gpiochip_remove(struct gpio_chip *chip) * be removed, else it will be dangling until the last user is * gone. */ + cdev_del(&gdev->chrdev); + device_del(&gdev->dev); put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpiochip_remove); From 7d4defe21c682c934a19fce1ba8b54b7bde61b08 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jun 2016 10:58:20 +0200 Subject: [PATCH 0341/1050] gpio: include in gpiolib-of When enabling the gpiolib for all archs a build robot came up with this: All errors (new ones prefixed by >>): drivers/gpio/gpiolib-of.c: In function 'of_mm_gpiochip_add_data': >> drivers/gpio/gpiolib-of.c:317:2: error: implicit declaration of function 'iounmap' [-Werror=implicit-function-declaration] iounmap(mm_gc->regs); ^~~~~~~ cc1: some warnings being treated as errors Fix this by including explicitly. Fixes: 296ad4acb8ef ("gpio: remove deps on ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB") Reported-by: kbuild test robot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index d22dcc38179d..4aabddb38b59 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include From 0106d456c4cb1770253fefc0ab23c9ca760b43f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:55:15 +0100 Subject: [PATCH 0342/1050] arm64: mm: always take dirty state from new pte in ptep_set_access_flags Commit 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") ensured that pte flags are updated atomically in the face of potential concurrent, hardware-assisted updates. However, Alex reports that: | This patch breaks swapping for me. | In the broken case, you'll see either systemd cpu time spike (because | it's stuck in a page fault loop) or the system hang (because the | application owning the screen is stuck in a page fault loop). It turns out that this is because the 'dirty' argument to ptep_set_access_flags is always 0 for read faults, and so we can't use it to set PTE_RDONLY. The failing sequence is: 1. We put down a PTE_WRITE | PTE_DIRTY | PTE_AF pte 2. Memory pressure -> pte_mkold(pte) -> clear PTE_AF 3. A read faults due to the missing access flag 4. ptep_set_access_flags is called with dirty = 0, due to the read fault 5. pte is then made PTE_WRITE | PTE_DIRTY | PTE_AF | PTE_RDONLY (!) 6. A write faults, but pte_write is true so we get stuck The solution is to check the new page table entry (as would be done by the generic, non-atomic definition of ptep_set_access_flags that just calls set_pte_at) to establish the dirty state. Cc: # 4.3+ Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Reviewed-by: Catalin Marinas Reported-by: Alexander Graf Tested-by: Alexander Graf Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5954881a35ac..ba3fc12bd272 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * PTE_RDONLY is cleared by default in the asm below, so set it in * back if necessary (read-only or clean PTE). */ - if (!pte_write(entry) || !dirty) + if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* From 7cfe749fad5158247282f2fee30773fd454029ab Mon Sep 17 00:00:00 2001 From: Tony Makkiel Date: Wed, 18 May 2016 17:22:45 +0100 Subject: [PATCH 0343/1050] leds: core: Fix brightness setting upon hardware blinking enabled Commit 76931edd54f8 ("leds: fix brightness changing when software blinking is active") changed the semantics of led_set_brightness() which according to the documentation should disable blinking upon any brightness setting. Moreover it made it different for soft blink case, where it was possible to change blink brightness, and for hardware blink case, where setting any brightness greater than 0 was ignored. While the change itself is against the documentation claims, it was driven also by the fact that timer trigger remained active after turning blinking off. Fixing that would have required major refactoring in the led-core, led-class, and led-triggers because of cyclic dependencies. Finally, it has been decided that allowing for brightness change during blinking is beneficial as it can be accomplished without disturbing blink rhythm. The change in brightness setting semantics will not affect existing LED class drivers that implement blink_set op thanks to the LED_BLINK_SW flag introduced by this patch. The flag state will be from now on checked in led_set_brightness() which will allow to distinguish between software and hardware blink mode. In the latter case the control will be passed directly to the drivers which apply their semantics on brightness set, which is disable the blinking in case of most such drivers. New drivers will apply new semantics and just change the brightness while hardware blinking is on, if possible. The issue was smuggled by subsequent LED core improvements, which modified the code that originally introduced the problem. Fixes: f1e80c07416a ("leds: core: Add two new LED_BLINK_ flags") Signed-off-by: Tony Makkiel Signed-off-by: Jacek Anaszewski --- Documentation/leds/leds-class.txt | 4 ++-- drivers/leds/led-core.c | 9 ++++++--- include/linux/leds.h | 23 ++++++++++++----------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt index d406d98339b2..44f5e6bccd97 100644 --- a/Documentation/leds/leds-class.txt +++ b/Documentation/leds/leds-class.txt @@ -74,8 +74,8 @@ blink_set() function (see ). To set an LED to blinking, however, it is better to use the API function led_blink_set(), as it will check and implement software fallback if necessary. -To turn off blinking again, use the API function led_brightness_set() -as that will not just set the LED brightness but also stop any software +To turn off blinking, use the API function led_brightness_set() +with brightness value LED_OFF, which should stop any software timers that may have been required for blinking. The blink_set() function should choose a user friendly blinking value diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 3495d5d6547f..3bce44893021 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -53,11 +53,12 @@ static void led_timer_function(unsigned long data) if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) { led_set_brightness_nosleep(led_cdev, LED_OFF); + led_cdev->flags &= ~LED_BLINK_SW; return; } if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) { - led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP; + led_cdev->flags &= ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW); return; } @@ -151,6 +152,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev, return; } + led_cdev->flags |= LED_BLINK_SW; mod_timer(&led_cdev->blink_timer, jiffies + 1); } @@ -219,6 +221,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev) del_timer_sync(&led_cdev->blink_timer); led_cdev->blink_delay_on = 0; led_cdev->blink_delay_off = 0; + led_cdev->flags &= ~LED_BLINK_SW; } EXPORT_SYMBOL_GPL(led_stop_software_blink); @@ -226,10 +229,10 @@ void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness) { /* - * In case blinking is on delay brightness setting + * If software blink is active, delay brightness setting * until the next timer tick. */ - if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) { + if (led_cdev->flags & LED_BLINK_SW) { /* * If we need to disable soft blinking delegate this to the * work queue task to avoid problems in case we are called diff --git a/include/linux/leds.h b/include/linux/leds.h index d2b13066e781..e5e7f2e80a54 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -42,15 +42,16 @@ struct led_classdev { #define LED_UNREGISTERING (1 << 1) /* Upper 16 bits reflect control information */ #define LED_CORE_SUSPENDRESUME (1 << 16) -#define LED_BLINK_ONESHOT (1 << 17) -#define LED_BLINK_ONESHOT_STOP (1 << 18) -#define LED_BLINK_INVERT (1 << 19) -#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 20) -#define LED_BLINK_DISABLE (1 << 21) -#define LED_SYSFS_DISABLE (1 << 22) -#define LED_DEV_CAP_FLASH (1 << 23) -#define LED_HW_PLUGGABLE (1 << 24) -#define LED_PANIC_INDICATOR (1 << 25) +#define LED_BLINK_SW (1 << 17) +#define LED_BLINK_ONESHOT (1 << 18) +#define LED_BLINK_ONESHOT_STOP (1 << 19) +#define LED_BLINK_INVERT (1 << 20) +#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 21) +#define LED_BLINK_DISABLE (1 << 22) +#define LED_SYSFS_DISABLE (1 << 23) +#define LED_DEV_CAP_FLASH (1 << 24) +#define LED_HW_PLUGGABLE (1 << 25) +#define LED_PANIC_INDICATOR (1 << 26) /* Set LED brightness level * Must not sleep. Use brightness_set_blocking for drivers @@ -72,8 +73,8 @@ struct led_classdev { * and if both are zero then a sensible default should be chosen. * The call should adjust the timings in that case and if it can't * match the values specified exactly. - * Deactivate blinking again when the brightness is set to a fixed - * value via the brightness_set() callback. + * Deactivate blinking again when the brightness is set to LED_OFF + * via the brightness_set() callback. */ int (*blink_set)(struct led_classdev *led_cdev, unsigned long *delay_on, From 5ab92a7cb82c66bf30685583a38a18538e3807db Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jun 2016 10:29:48 +0200 Subject: [PATCH 0344/1050] leds: handle suspend/resume in heartbeat trigger The following phenomena was observed: when suspending the system, sometimes the heartbeat LED was left on, glowing and wasting power while the rest of the system is asleep, also disturbing power dissapation measures on the odd suspend cycle when it's left on. Clearly this is not how we want the heartbeat trigger to work: it should turn off and leave the LED off during system suspend. This removes the heartbeat trigger when preparing suspend and restores it during resume. The trigger code will make sure all LEDs are left in OFF state after removing the trigger, and will re-enable the trigger on all LEDs after resuming. Cc: linux-pm@vger.kernel.org Signed-off-by: Linus Walleij Reviewed-by: Ulf Hansson Signed-off-by: Jacek Anaszewski --- drivers/leds/trigger/ledtrig-heartbeat.c | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c index 410c39c62dc7..c9f386213e9e 100644 --- a/drivers/leds/trigger/ledtrig-heartbeat.c +++ b/drivers/leds/trigger/ledtrig-heartbeat.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "../leds.h" static int panic_heartbeats; @@ -154,6 +155,30 @@ static struct led_trigger heartbeat_led_trigger = { .deactivate = heartbeat_trig_deactivate, }; +static int heartbeat_pm_notifier(struct notifier_block *nb, + unsigned long pm_event, void *unused) +{ + int rc; + + switch (pm_event) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + led_trigger_unregister(&heartbeat_led_trigger); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + rc = led_trigger_register(&heartbeat_led_trigger); + if (rc) + pr_err("could not re-register heartbeat trigger\n"); + break; + default: + break; + } + return NOTIFY_DONE; +} + static int heartbeat_reboot_notifier(struct notifier_block *nb, unsigned long code, void *unused) { @@ -168,6 +193,10 @@ static int heartbeat_panic_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static struct notifier_block heartbeat_pm_nb = { + .notifier_call = heartbeat_pm_notifier, +}; + static struct notifier_block heartbeat_reboot_nb = { .notifier_call = heartbeat_reboot_notifier, }; @@ -184,12 +213,14 @@ static int __init heartbeat_trig_init(void) atomic_notifier_chain_register(&panic_notifier_list, &heartbeat_panic_nb); register_reboot_notifier(&heartbeat_reboot_nb); + register_pm_notifier(&heartbeat_pm_nb); } return rc; } static void __exit heartbeat_trig_exit(void) { + unregister_pm_notifier(&heartbeat_pm_nb); unregister_reboot_notifier(&heartbeat_reboot_nb); atomic_notifier_chain_unregister(&panic_notifier_list, &heartbeat_panic_nb); From 970442c599b22ccd644ebfe94d1d303bf6f87c05 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:27 -0700 Subject: [PATCH 0345/1050] x86/cpu/intel: Introduce macros for Intel family numbers Problem: We have a boatload of open-coded family-6 model numbers. Half of them have these model numbers in hex and the other half in decimal. This makes grepping for them tons of fun, if you were to try. Solution: Consolidate all the magic numbers. Put all the definitions in one header. The names here are closely derived from the comments describing the models from arch/x86/events/intel/core.c. We could easily make them shorter by doing things like s/SANDYBRIDGE/SNB/, but they seemed fine even with the longer versions to me. Do not take any of these names too literally, like "DESKTOP" or "MOBILE". These are all colloquial names and not precise descriptions of everywhere a given model will show up. Signed-off-by: Dave Hansen Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Darren Hart Cc: Dave Hansen Cc: Denys Vlasenko Cc: Doug Thompson Cc: Eduardo Valentin Cc: H. Peter Anvin Cc: Jacob Pan Cc: Kan Liang Cc: Len Brown Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rajneesh Bhardwaj Cc: Souvik Kumar Chakravarty Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tony Luck Cc: Ulf Hansson Cc: Viresh Kumar Cc: Vishwanath Somayaji Cc: Zhang Rui Cc: jacob.jun.pan@intel.com Cc: linux-acpi@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20160603001927.F2A7D828@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-family.h | 68 +++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/x86/include/asm/intel-family.h diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h new file mode 100644 index 000000000000..6999f7d01a0d --- /dev/null +++ b/arch/x86/include/asm/intel-family.h @@ -0,0 +1,68 @@ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * The "_X" parts are generally the EP and EX Xeons, or the + * "Extreme" ones, like Broadwell-E. + * + * Things ending in "2" are usually because we have no better + * name for them. There's no processor called "WESTMERE2". + */ + +#define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE2 0x1F +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_ULT 0x45 +#define INTEL_FAM6_HASWELL_GT3E 0x46 + +#define INTEL_FAM6_BROADWELL_CORE 0x3D +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 +#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F + +#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E +#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_X 0x55 +#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E +#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E + +/* "Small Core" Processors (Atom) */ + +#define INTEL_FAM6_ATOM_PINEVIEW 0x1C +#define INTEL_FAM6_ATOM_LINCROFT 0x26 +#define INTEL_FAM6_ATOM_PENWELL 0x27 +#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 +#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 +#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ +#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ From 4dc0dd83603f05dc3ae152af33ecb15104c313f3 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 8 Jun 2016 09:32:51 +0200 Subject: [PATCH 0346/1050] spi: rockchip: Signal unfinished DMA transfers When using DMA, the transfer_one callback should return 1 because the transfer hasn't finished yet. A previous commit changed the function to return 0 when the DMA channels were correctly prepared. This manifested in Veyron boards with this message: [ 1.983605] cros-ec-spi spi0.0: EC failed to respond in time Fixes: ea9849113343 ("spi: rockchip: check return value of dmaengine_prep_slave_sg") Signed-off-by: Tomeu Vizoso Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-rockchip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index cd89682065b9..1026e180eed7 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one( struct spi_device *spi, struct spi_transfer *xfer) { - int ret = 1; + int ret = 0; struct rockchip_spi *rs = spi_master_get_devdata(master); WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) && @@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one( spi_enable_chip(rs, 1); ret = rockchip_spi_prepare_dma(rs); } + /* successful DMA prepare means the transfer is in progress */ + ret = ret ? ret : 1; } else { spi_enable_chip(rs, 1); ret = rockchip_spi_pio_transfer(rs); From a8a47540ebe32f1733eebc3e5699af580ceaa3f5 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Jun 2016 11:23:15 +0100 Subject: [PATCH 0347/1050] regulator: qcom_smd: add list_voltage callback This patch adds support to list_voltage callback, so that consumers like mmc core, can get information of supported voltage range. Without this patch there is no way for mmc core to know this voltage range. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown Cc: stable@vger.kernel.org # v4.6 --- drivers/regulator/qcom_smd-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 56a17ec5b5ef..b11b627a50db 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -140,6 +140,7 @@ static const struct regulator_ops rpm_smps_ldo_ops = { .enable = rpm_reg_enable, .disable = rpm_reg_disable, .is_enabled = rpm_reg_is_enabled, + .list_voltage = regulator_list_voltage_linear_range, .get_voltage = rpm_reg_get_voltage, .set_voltage = rpm_reg_set_voltage, From d1e44b6b2823f1751ffe7e7589f545f05cfe2095 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 3 Jun 2016 12:23:09 +0100 Subject: [PATCH 0348/1050] regulator: qcom_smd: add regulator ops for pm8941 lnldo After "regulator: qcom_smd: add list_voltage callback" patch adding pm8941 lnldo regulators would bug on list_voltages as it is a fixed regulator without any linear range. This patch fixes that issue by adding dedicated ops for pm8941 lnldo without list_voltages callback. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown Cc: stable@vger.kernel.org # v4.6 --- drivers/regulator/qcom_smd-regulator.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index b11b627a50db..6c7fe4778793 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -148,6 +148,17 @@ static const struct regulator_ops rpm_smps_ldo_ops = { .set_load = rpm_reg_set_load, }; +static const struct regulator_ops rpm_smps_ldo_ops_fixed = { + .enable = rpm_reg_enable, + .disable = rpm_reg_disable, + .is_enabled = rpm_reg_is_enabled, + + .get_voltage = rpm_reg_get_voltage, + .set_voltage = rpm_reg_set_voltage, + + .set_load = rpm_reg_set_load, +}; + static const struct regulator_ops rpm_switch_ops = { .enable = rpm_reg_enable, .disable = rpm_reg_disable, @@ -248,7 +259,7 @@ static const struct regulator_desc pm8941_nldo = { static const struct regulator_desc pm8941_lnldo = { .fixed_uV = 1740000, .n_voltages = 1, - .ops = &rpm_smps_ldo_ops, + .ops = &rpm_smps_ldo_ops_fixed, }; static const struct regulator_desc pm8941_switch = { From 60a5eaba46919f3a4c63a0c93bb7015ad5e56a82 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jun 2016 13:45:08 +0200 Subject: [PATCH 0349/1050] gpio: select ANON_INODES The build servers found that gpiolib is using ANON_INODES but has forgotten to select it. Fix this. Reported-by: kbuild test robot Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 48da857f4774..a116609b1914 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -33,6 +33,7 @@ config ARCH_REQUIRE_GPIOLIB menuconfig GPIOLIB bool "GPIO Support" + select ANON_INODES help This enables GPIO support through the generic GPIO library. You only need to enable this, if you also want to enable From 96685a55a82c383cbba7ef1d4a636acf708cf17f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Jun 2016 12:04:28 +0200 Subject: [PATCH 0350/1050] x86/cpu/AMD: Extend X86_FEATURE_TOPOEXT workaround to newer models We need to reenable the topology extensions CPUID leafs on newer models too, if BIOS has disabled them, as we rely on them to get proper compute unit topology. Make the printk a once thing, while at it. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rui Huang Cc: Sherry Hurwitz Cc: Thomas Gleixner Cc: linux-hwmon@vger.kernel.org Link: http://lkml.kernel.org/r/1464775468-23355-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c343a54bed39..f5c69d8974e1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -674,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c) u64 value; /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { if (msr_set_bit(0xc0011005, 54) > 0) { rdmsrl(0xc0011005, value); if (value & BIT_64(54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); } } } From b66b2a0adf0e48973b582e055758b9907a7eee7c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 17:22:17 +0100 Subject: [PATCH 0351/1050] gpio: bcm-kona: fix bcm_kona_gpio_reset() warnings The bcm_kona_gpio_reset() calls bcm_kona_gpio_write_lock_regs() with what looks like the wrong parameter. The write_lock_regs function takes a pointer to the registers, not the bcm_kona_gpio structure. Fix the warning, and probably bug by changing the function to pass reg_base instead of kona_gpio, fixing the following warning: drivers/gpio/gpio-bcm-kona.c:550:47: warning: incorrect type in argument 1 (different address spaces) expected void [noderef] *reg_base got struct bcm_kona_gpio *kona_gpio warning: incorrect type in argument 1 (different address spaces) expected void [noderef] *reg_base got struct bcm_kona_gpio *kona_gpio Cc: stable@vger.kernel.org Signed-off-by: Ben Dooks Acked-by: Ray Jui Reviewed-by: Markus Mayer Signed-off-by: Linus Walleij --- drivers/gpio/gpio-bcm-kona.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 9aabc48ff5de..953e4b829e32 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -547,11 +547,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio) /* disable interrupts and clear status */ for (i = 0; i < kona_gpio->num_bank; i++) { /* Unlock the entire bank first */ - bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE); + bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE); writel(0xffffffff, reg_base + GPIO_INT_MASK(i)); writel(0xffffffff, reg_base + GPIO_INT_STATUS(i)); /* Now re-lock the bank */ - bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE); + bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE); } } From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jun 2016 10:19:51 +0200 Subject: [PATCH 0352/1050] locking/qspinlock: Fix spin_unlock_wait() some more While this prior commit: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") ... fixes spin_is_locked() and spin_unlock_wait() for the usage in ipc/sem and netfilter, it does not in fact work right for the usage in task_work and futex. So while the 2 locks crossed problem: spin_lock(A) spin_lock(B) if (!spin_is_locked(B)) spin_unlock_wait(A) foo() foo(); ... works with the smp_mb() injected by both spin_is_locked() and spin_unlock_wait(), this is not sufficient for: flag = 1; smp_mb(); spin_lock() spin_unlock_wait() if (!flag) // add to lockless list // iterate lockless list ... because in this scenario, the store from spin_lock() can be delayed past the load of flag, uncrossing the variables and loosing the guarantee. This patch reworks spin_is_locked() and spin_unlock_wait() to work in both cases by exploiting the observation that while the lock byte store can be delayed, the contender must have registered itself visibly in other state contained in the word. It also allows for architectures to override both functions, as PPC and ARM64 have an additional issue for which we currently have no generic solution. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Giovanni Gherdovich Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: stable@vger.kernel.org # v4.2 and later Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 53 ++++++++++------------------- kernel/locking/qspinlock.c | 60 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 6bd05700d8c9..05f05f17a7c2 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -21,38 +21,34 @@ #include +/** + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +#ifndef queued_spin_unlock_wait +extern void queued_spin_unlock_wait(struct qspinlock *lock); +#endif + /** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure * Return: 1 if it is locked, 0 otherwise */ +#ifndef queued_spin_is_locked static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* - * queued_spin_lock_slowpath() can ACQUIRE the lock before - * issuing the unordered store that sets _Q_LOCKED_VAL. + * See queued_spin_unlock_wait(). * - * See both smp_cond_acquire() sites for more detail. - * - * This however means that in code like: - * - * spin_lock(A) spin_lock(B) - * spin_unlock_wait(B) spin_is_locked(A) - * do_something() do_something() - * - * Both CPUs can end up running do_something() because the store - * setting _Q_LOCKED_VAL will pass through the loads in - * spin_unlock_wait() and/or spin_is_locked(). - * - * Avoid this by issuing a full memory barrier between the spin_lock() - * and the loads in spin_unlock_wait() and spin_is_locked(). - * - * Note that regular mutual exclusion doesn't care about this - * delayed store. + * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL + * isn't immediately observable. */ - smp_mb(); - return atomic_read(&lock->val) & _Q_LOCKED_MASK; + return atomic_read(&lock->val); } +#endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? @@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) } #endif -/** - * queued_spin_unlock_wait - wait until current lock holder releases the lock - * @lock : Pointer to queued spinlock structure - * - * There is a very slight possibility of live-lock if the lockers keep coming - * and the waiter is just unfortunate enough to not see any unlock state. - */ -static inline void queued_spin_unlock_wait(struct qspinlock *lock) -{ - /* See queued_spin_is_locked() */ - smp_mb(); - while (atomic_read(&lock->val) & _Q_LOCKED_MASK) - cpu_relax(); -} - #ifndef virt_spin_lock static __always_inline bool virt_spin_lock(struct qspinlock *lock) { diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +/* + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before + * issuing an _unordered_ store to set _Q_LOCKED_VAL. + * + * This means that the store can be delayed, but no later than the + * store-release from the unlock. This means that simply observing + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. + * + * There are two paths that can issue the unordered store: + * + * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 + * + * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 + * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 + * + * However, in both cases we have other !0 state we've set before to queue + * ourseves: + * + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our + * load is constrained by that ACQUIRE to not pass before that, and thus must + * observe the store. + * + * For (2) we have a more intersting scenario. We enqueue ourselves using + * xchg_tail(), which ends up being a RELEASE. This in itself is not + * sufficient, however that is followed by an smp_cond_acquire() on the same + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and + * guarantees we must observe that store. + * + * Therefore both cases have other !0 state that is observable before the + * unordered locked byte store comes through. This means we can use that to + * wait for the lock store, and then wait for an unlock. + */ +#ifndef queued_spin_unlock_wait +void queued_spin_unlock_wait(struct qspinlock *lock) +{ + u32 val; + + for (;;) { + val = atomic_read(&lock->val); + + if (!val) /* not locked, we're done */ + goto done; + + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ + break; + + /* not locked, but pending, wait until we observe the lock */ + cpu_relax(); + } + + /* any unlock is good */ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); + +done: + smp_rmb(); /* CTRL + RMB -> ACQUIRE */ +} +EXPORT_SYMBOL(queued_spin_unlock_wait); +#endif + #endif /* _GEN_PV_LOCK_SLOWPATH */ /** From 62a92c8f553e49270a0ee391b8733da71ab0aebc Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 7 Jun 2016 15:44:15 +0300 Subject: [PATCH 0353/1050] perf/core: Remove a redundant check There is no way to end up in _free_event() with event::pmu being NULL. The latter is initialized in event allocation path and remains set forever. In case of allocation failure, the error path doesn't use _free_event(). Having the check, however, suggests that it is possible to have a event::pmu==NULL situation in _free_event() and confuses the robots. This patch gets rid of the check. Reported-by: Dan Carpenter Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Cc: vince@deater.net Link: http://lkml.kernel.org/r/1465303455-26032-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 050a290c72c7..87e945d6ebb8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); - if (event->pmu) { - exclusive_event_destroy(event); - module_put(event->pmu->module); - } + exclusive_event_destroy(event); + module_put(event->pmu->module); call_rcu(&event->rcu_head, free_event_rcu); } From 9c57259117b9c25472a3fa6d5a14d6bb3b647e87 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 3 Jun 2016 17:58:40 -0500 Subject: [PATCH 0354/1050] sched/debug: Fix /proc/sched_debug regression Commit: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") ... introduced a bug when CONFIG_SCHEDSTATS is enabled and the runtime tunable is disabled (which is the default). The wait-time, sum-exec, and sum-sleep fields are missing from the /proc/sched_debug file in the runnable_tasks section. Fix it with a new schedstat_val() macro which returns the field value when schedstats is enabled and zero otherwise. The macro works with both SCHEDSTATS and !SCHEDSTATS. I put the macro in stats.h since it might end up being useful in other places. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mel Gorman Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") Link: http://lkml.kernel.org/r/bcda7c2790cf2ccbe586a28c02dd7b6fe7749a2b.1464994423.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 15 ++++----------- kernel/sched/stats.h | 3 +++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index cf905f655ba1..0368c393a336 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); -#ifdef CONFIG_SCHEDSTATS - if (schedstat_enabled()) { - SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", - SPLIT_NS(p->se.statistics.wait_sum), - SPLIT_NS(p->se.sum_exec_runtime), - SPLIT_NS(p->se.statistics.sum_sleep_runtime)); - } -#else + SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", - 0LL, 0L, + SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)), SPLIT_NS(p->se.sum_exec_runtime), - 0LL, 0L); -#endif + SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime))); + #ifdef CONFIG_NUMA_BALANCING SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 70b3b6a20fb0..78955cbea31c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) +# define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0) + #else /* !CONFIG_SCHEDSTATS */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) @@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) # define schedstat_inc(rq, field) do { } while (0) # define schedstat_add(rq, field, amt) do { } while (0) # define schedstat_set(var, val) do { } while (0) +# define schedstat_val(rq, field) 0 #endif #ifdef CONFIG_SCHED_INFO From 4698f88c06b893f2acc0b443004a53bf490fde7c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 7 Jun 2016 14:43:16 -0500 Subject: [PATCH 0355/1050] sched/debug: Fix 'schedstats=enable' cmdline option The 'schedstats=enable' option doesn't work, and also produces the following warning during boot: WARNING: CPU: 0 PID: 0 at /home/jpoimboe/git/linux/kernel/jump_label.c:61 static_key_slow_inc+0x8c/0xa0 static_key_slow_inc used before call to jump_label_init Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc1+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 0000000000000086 3ae3475a4bea95d4 ffffffff81e03da8 ffffffff8143fc83 ffffffff81e03df8 0000000000000000 ffffffff81e03de8 ffffffff810b1ffb 0000003d00000096 ffffffff823514d0 ffff88007ff197c8 0000000000000000 Call Trace: [] dump_stack+0x85/0xc2 [] __warn+0xcb/0xf0 [] warn_slowpath_fmt+0x5f/0x80 [] static_key_slow_inc+0x8c/0xa0 [] static_key_enable+0x16/0x40 [] setup_schedstats+0x29/0x94 [] unknown_bootoption+0x89/0x191 [] parse_args+0x297/0x4b0 [] start_kernel+0x1d8/0x4a9 [] ? set_init_arg+0x55/0x55 [] ? early_idt_handler_array+0x120/0x120 [] x86_64_start_reservations+0x2f/0x31 [] x86_64_start_kernel+0x14a/0x16d The problem is that it tries to update the 'sched_schedstats' static key before jump labels have been initialized. Changing jump_label_init() to be called earlier before parse_early_param() wouldn't fix it: it would still fail trying to poke_text() because mm isn't yet initialized. Instead, just create a temporary '__sched_schedstats' variable which can be copied to the static key later during sched_init() after jump labels have been initialized. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Matt Fleming Cc: Mel Gorman Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") Link: http://lkml.kernel.org/r/453775fe3433bed65731a583e228ccea806d18cd.1465322027.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4620c7..385c947482e1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, #endif #endif -DEFINE_STATIC_KEY_FALSE(sched_schedstats); - #ifdef CONFIG_SCHEDSTATS + +DEFINE_STATIC_KEY_FALSE(sched_schedstats); +static bool __initdata __sched_schedstats = false; + static void set_schedstats(bool enabled) { if (enabled) @@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str) if (!str) goto out; + /* + * This code is called before jump labels have been set up, so we can't + * change the static branch directly just yet. Instead set a temporary + * variable so init_schedstats() can do it later. + */ if (!strcmp(str, "enable")) { - set_schedstats(true); + __sched_schedstats = true; ret = 1; } else if (!strcmp(str, "disable")) { - set_schedstats(false); + __sched_schedstats = false; ret = 1; } out: @@ -2293,6 +2300,11 @@ out: } __setup("schedstats=", setup_schedstats); +static void __init init_schedstats(void) +{ + set_schedstats(__sched_schedstats); +} + #ifdef CONFIG_PROC_SYSCTL int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write, set_schedstats(state); return err; } -#endif -#endif +#endif /* CONFIG_PROC_SYSCTL */ +#else /* !CONFIG_SCHEDSTATS */ +static inline void init_schedstats(void) {} +#endif /* CONFIG_SCHEDSTATS */ /* * fork()/clone()-time setup: @@ -7487,6 +7501,8 @@ void __init sched_init(void) #endif init_sched_fair_class(); + init_schedstats(); + scheduler_running = 1; } From 3eefa7e8cc8516d737a6a961914e20501eb2a952 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 2 Jun 2016 21:30:48 -0300 Subject: [PATCH 0356/1050] dt-bindings: Add vendor prefix for TechNexion TechNexion designs and manufactures embedded computing systems: http://www.technexion.com/ Signed-off-by: Fabio Estevam Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 1387cd33064e..2c2500df0dce 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -255,6 +255,7 @@ synology Synology, Inc. SUNW Sun Microsystems, Inc tbs TBS Technologies tcl Toby Churchill Ltd. +technexion TechNexion technologic Technologic Systems thine THine Electronics, Inc. ti Texas Instruments From 5c1d3310d84309330264e2a06e5000eb289a44ad Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 7 Jun 2016 18:44:48 +0100 Subject: [PATCH 0357/1050] drivers: of: Fix of_pci.h header guard The compilation of of_pci.c is governed by CONFIG_OF_PCI, but the corresponding declarations in of_pci.h are inconsistently guarded by CONFIG_OF, with the result that if CONFIG_PCI is disabled for an OF platform, the dangling external declarations are still active and the inline stub definitions not. So far this has managed to go unnoticed since it happens that the only references to these functions are from code which itself depends on CONFIG_PCI or CONFIG_OF_PCI. Fix this with the appropriate config guard so that any new callers outside PCI-specific code don't start unexpectedly breaking under certain configs. Signed-off-by: Robin Murphy Signed-off-by: Rob Herring --- include/linux/of_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index f6e9e85164e8..b969e9443962 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -8,7 +8,7 @@ struct pci_dev; struct of_phandle_args; struct device_node; -#ifdef CONFIG_OF +#ifdef CONFIG_OF_PCI int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq); struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn); From 0b0d81e3b7334897da9b2e3ffee860c2046f7bc0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 26 May 2016 13:43:43 -0500 Subject: [PATCH 0358/1050] objtool, drm/vmwgfx: Fix "duplicate frame pointer save" warning objtool reports the following warnings: drivers/gpu/drm/vmwgfx/vmwgfx_msg.o: warning: objtool: vmw_send_msg()+0x107: duplicate frame pointer save drivers/gpu/drm/vmwgfx/vmwgfx_msg.o: warning: objtool: vmw_host_get_guestinfo()+0x252: duplicate frame pointer save To quote Linus: "The reason is that VMW_PORT_HB_OUT() uses a magic instruction sequence (a "rep outsb") to communicate with the hypervisor (it's a virtual GPU driver for vmware), and %rbp is part of the communication. So the inline asm does a save-and-restore of the frame pointer around the instruction sequence. I actually find the objtool warning to be quite reasonable, so it's not exactly a false positive, since in this case it actually does point out that the frame pointer won't be reliable over that instruction sequence. But in this particular case it just ends up being the wrong thing - the code is what it is, and %rbp just can't have the frame information due to annoying magic calling conventions." Silence the warnings by telling objtool to ignore the two functions which use the VMW_PORT_HB_{IN,OUT} macros. Reported-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Acked-by: Linus Torvalds Cc: DRI Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160526184343.fdtjjjg67smmeekt@treble Signed-off-by: Ingo Molnar --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 6de283c8fa3e..f0374f9b56ca 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "drmP.h" #include "vmwgfx_msg.h" @@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg) return -EINVAL; } - +STACK_FRAME_NON_STANDARD(vmw_send_msg); /** @@ -304,6 +305,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, return 0; } +STACK_FRAME_NON_STANDARD(vmw_recv_msg); /** From d366a0ff1cf73f93796f2377e7b0361a94c41c35 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 8 Jun 2016 10:32:10 -0400 Subject: [PATCH 0359/1050] nbd: pass the nbd pointer for flags debugfs We were passing in &nbd for the private data in debugfs_create_file() for the flags entry. We expect it to just be nbd, fix this so we get proper output from this debugfs entry. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 31e73a7a40f2..6a48ed41963f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -941,7 +941,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout); debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); - debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops); + debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); return 0; } From c0530dd3ef1d3997d885e1345e34a284db2b9cfa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 8 Jun 2016 14:57:28 +0530 Subject: [PATCH 0360/1050] cxgb4: Add device id of T540-BT adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index a2cdfc1261dc..50812a1d67bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -144,6 +144,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */ CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */ CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ From 077fa7aed17de5022e44bf07dbaf732078b7b5b2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 8 Jun 2016 14:25:22 +0100 Subject: [PATCH 0361/1050] futex: Calculate the futex key based on a tail page for file-based futexes Mike Galbraith reported that the LTP test case futex_wake04 was broken by commit 65d8fc777f6d ("futex: Remove requirement for lock_page() in get_futex_key()"). This test case uses futexes backed by hugetlbfs pages and so there is an associated inode with a futex stored on such pages. The problem is that the key is being calculated based on the head page index of the hugetlbfs page and not the tail page. Prior to the optimisation, the page lock was used to stabilise mappings and pin the inode is file-backed which is overkill. If the page was a compound page, the head page was automatically looked up as part of the page lock operation but the tail page index was used to calculate the futex key. After the optimisation, the compound head is looked up early and the page lock is only relied upon to identify truncated pages, special pages or a shmem page moving to swapcache. The head page is looked up because without the page lock, special care has to be taken to pin the inode correctly. However, the tail page is still required to calculate the futex key so this patch records the tail page. On vanilla 4.6, the output of the test case is; futex_wake04 0 TINFO : Hugepagesize 2097152 futex_wake04 1 TFAIL : futex_wake04.c:126: Bug: wait_thread2 did not wake after 30 secs. With the patch applied futex_wake04 0 TINFO : Hugepagesize 2097152 futex_wake04 1 TPASS : Hi hydra, thread2 awake! Fixes: 65d8fc777f6d "futex: Remove requirement for lock_page() in get_futex_key()" Reported-and-tested-by: Mike Galbraith Signed-off-by: Mel Gorman Acked-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Cc: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160608132522.GM2469@suse.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index ee25f5ba4aca..33664f70e2d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; - struct page *page; + struct page *page, *tail; struct address_space *mapping; int err, ro = 0; @@ -530,7 +530,15 @@ again: * considered here and page lock forces unnecessarily serialization * From this point on, mapping will be re-verified if necessary and * page lock will be acquired only if it is unavoidable + * + * Mapping checks require the head page for any compound page so the + * head page and mapping is looked up now. For anonymous pages, it + * does not matter if the page splits in the future as the key is + * based on the address. For filesystem-backed pages, the tail is + * required as the index of the page determines the key. For + * base pages, there is no tail page and tail == page. */ + tail = page; page = compound_head(page); mapping = READ_ONCE(page->mapping); @@ -654,7 +662,7 @@ again: key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.inode = inode; - key->shared.pgoff = basepage_index(page); + key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } From efd1535270c1deb0487527bf0c3c827301a69c93 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 7 Jun 2016 10:43:15 -0400 Subject: [PATCH 0362/1050] xen-blkfront: don't call talk_to_blkback when already connected to blkback Sometimes blkfront may twice receive blkback_changed() notification (XenbusStateConnected) after migration, which will cause talk_to_blkback() to be called twice too and confuse xen-blkback. The flow is as follow: blkfront blkback blkfront_resume() > talk_to_blkback() > Set blkfront to XenbusStateInitialised front changed() > Connect() > Set blkback to XenbusStateConnected blkback_changed() > Skip talk_to_blkback() because frontstate == XenbusStateInitialised > blkfront_connect() > Set blkfront to XenbusStateConnected ----- And here we get another XenbusStateConnected notification leading to: ----- blkback_changed() > because now frontstate != XenbusStateInitialised talk_to_blkback() is also called again > blkfront state changed from XenbusStateConnected to XenbusStateInitialised (Which is not correct!) front_changed(): > Do nothing because blkback already in XenbusStateConnected Now blkback is in XenbusStateConnected but blkfront is still in XenbusStateInitialised - leading to no disks. Poking of the XenbusStateConnected state is allowed (to deal with block disk change) and has to be dealt with. The most likely cause of this bug are custom udev scripts hooking up the disks and then validating the size. Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ca13df854639..6ba8891e8efe 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2485,10 +2485,23 @@ static void blkback_changed(struct xenbus_device *dev, break; case XenbusStateConnected: - if (dev->state != XenbusStateInitialised) { + /* + * talk_to_blkback sets state to XenbusStateInitialised + * and blkfront_connect sets it to XenbusStateConnected + * (if connection went OK). + * + * If the backend (or toolstack) decides to poke at backend + * state (and re-trigger the watch by setting the state repeatedly + * to XenbusStateConnected (4)) we need to deal with this. + * This is allowed as this is used to communicate to the guest + * that the size of disk has changed! + */ + if ((dev->state != XenbusStateInitialised) && + (dev->state != XenbusStateConnected)) { if (talk_to_blkback(dev, info)) break; } + blkfront_connect(info); break; From 2a6f71ad99cabe436e70c3f5fcf58072cb3bc07f Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 31 May 2016 16:59:17 +0800 Subject: [PATCH 0363/1050] xen-blkfront: fix resume issues after a migration After a migrate to another host (which may not have multiqueue support), the number of rings (block hardware queues) may be changed and the ring info structure will also be reallocated. This patch fixes two related bugs: * call blk_mq_update_nr_hw_queues() to make blk-core know the number of hardware queues have been changed. * Don't store rinfo pointer to hctx->driver_data, because rinfo may be reallocated so use hctx->queue_num to get the rinfo structure instead. Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6ba8891e8efe..2e6d1e9c3345 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -874,8 +874,12 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *qd) { unsigned long flags; - struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data; + int qid = hctx->queue_num; + struct blkfront_info *info = hctx->queue->queuedata; + struct blkfront_ring_info *rinfo = NULL; + BUG_ON(info->nr_rings <= qid); + rinfo = &info->rinfo[qid]; blk_mq_start_request(qd->rq); spin_lock_irqsave(&rinfo->ring_lock, flags); if (RING_FULL(&rinfo->ring)) @@ -901,20 +905,9 @@ out_busy: return BLK_MQ_RQ_QUEUE_BUSY; } -static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int index) -{ - struct blkfront_info *info = (struct blkfront_info *)data; - - BUG_ON(info->nr_rings <= index); - hctx->driver_data = &info->rinfo[index]; - return 0; -} - static struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, .map_queue = blk_mq_map_queue, - .init_hctx = blk_mq_init_hctx, }; static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, @@ -950,6 +943,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, return PTR_ERR(rq); } + rq->queuedata = info; queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { @@ -2149,6 +2143,8 @@ static int blkfront_resume(struct xenbus_device *dev) return err; err = talk_to_blkback(dev, info); + if (!err) + blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings); /* * We have to wait for the backend to switch to From a5c5e2da8551eb69e5d5d09d51d526140b5db9fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Jun 2016 12:59:17 +0200 Subject: [PATCH 0364/1050] l2tp: fix configuration passed to setup_udp_tunnel_sock() Unused fields of udp_cfg must be all zeros. Otherwise setup_udp_tunnel_sock() fills ->gro_receive and ->gro_complete callbacks with garbage, eventually resulting in panic when used by udp_gro_receive(). [ 72.694123] BUG: unable to handle kernel paging request at ffff880033f87d78 [ 72.695518] IP: [] 0xffff880033f87d78 [ 72.696530] PGD 26e2067 PUD 26e3067 PMD 342ed063 PTE 8000000033f87163 [ 72.696530] Oops: 0011 [#1] SMP KASAN [ 72.696530] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pptp gre pppox ppp_generic slhc crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel evdev aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper serio_raw acpi_cpufreq button proc\ essor ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci virtio_ring virtio [ 72.696530] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.7.0-rc1 #1 [ 72.696530] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 72.696530] task: ffff880035b59700 ti: ffff880035b70000 task.ti: ffff880035b70000 [ 72.696530] RIP: 0010:[] [] 0xffff880033f87d78 [ 72.696530] RSP: 0018:ffff880035f87bc0 EFLAGS: 00010246 [ 72.696530] RAX: ffffed000698f996 RBX: ffff88003326b840 RCX: ffffffff814cc823 [ 72.696530] RDX: ffff88003326b840 RSI: ffff880033e48038 RDI: ffff880034c7c780 [ 72.696530] RBP: ffff880035f87c18 R08: 000000000000a506 R09: 0000000000000000 [ 72.696530] R10: ffff880035f87b38 R11: ffff880034b9344d R12: 00000000ebfea715 [ 72.696530] R13: 0000000000000000 R14: ffff880034c7c780 R15: 0000000000000000 [ 72.696530] FS: 0000000000000000(0000) GS:ffff880035f80000(0000) knlGS:0000000000000000 [ 72.696530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.696530] CR2: ffff880033f87d78 CR3: 0000000033c98000 CR4: 00000000000406a0 [ 72.696530] Stack: [ 72.696530] ffffffff814cc834 ffff880034b93468 0000001481416818 ffff88003326b874 [ 72.696530] ffff880034c7ccb0 ffff880033e48038 ffff88003326b840 ffff880034b93462 [ 72.696530] ffff88003326b88a ffff88003326b88c ffff880034b93468 ffff880035f87c70 [ 72.696530] Call Trace: [ 72.696530] [ 72.696530] [] ? udp_gro_receive+0x1c6/0x1f9 [ 72.696530] [] udp4_gro_receive+0x2b5/0x310 [ 72.696530] [] inet_gro_receive+0x4a3/0x4cd [ 72.696530] [] dev_gro_receive+0x584/0x7a3 [ 72.696530] [] ? __lock_is_held+0x29/0x64 [ 72.696530] [] napi_gro_receive+0x124/0x21d [ 72.696530] [] virtnet_receive+0x8df/0x8f6 [virtio_net] [ 72.696530] [] virtnet_poll+0x1d/0x8d [virtio_net] [ 72.696530] [] net_rx_action+0x15b/0x3b9 [ 72.696530] [] __do_softirq+0x216/0x546 [ 72.696530] [] irq_exit+0x49/0xb6 [ 72.696530] [] do_IRQ+0xe2/0xfa [ 72.696530] [] common_interrupt+0x89/0x89 [ 72.696530] [ 72.696530] [] ? trace_hardirqs_on_caller+0x229/0x270 [ 72.696530] [] ? default_idle+0x1c/0x2d [ 72.696530] [] ? default_idle+0x1a/0x2d [ 72.696530] [] arch_cpu_idle+0xa/0xc [ 72.696530] [] default_idle_call+0x1a/0x1c [ 72.696530] [] cpu_startup_entry+0x15b/0x20f [ 72.696530] [] start_secondary+0x12c/0x133 [ 72.696530] Code: ff ff ff ff ff ff ff ff ff ff 7f ff ff ff ff ff ff ff 7f 00 7e f8 33 00 88 ff ff 6d 61 58 81 ff ff ff ff 5e de 0a 81 ff ff ff ff <00> 5c e2 34 00 88 ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 72.696530] RIP [] 0xffff880033f87d78 [ 72.696530] RSP [ 72.696530] CR2: ffff880033f87d78 [ 72.696530] ---[ end trace ad7758b9a1dccf99 ]--- [ 72.696530] Kernel panic - not syncing: Fatal exception in interrupt [ 72.696530] Kernel Offset: disabled [ 72.696530] ---[ end Kernel panic - not syncing: Fatal exception in interrupt v2: use empty initialiser instead of "{ NULL }" to avoid relying on first field's type. Fixes: 38fd2af24fcf ("udp: Add socket based GRO and config") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6edfa9980314..1e40dacaa137 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; From 00bc0ef5880dc7b82f9c320dead4afaad48e47be Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 8 Jun 2016 15:13:34 +0200 Subject: [PATCH 0365/1050] ipv6: Skip XFRM lookup if dst_entry in socket cache is valid At present we perform an xfrm_lookup() for each UDPv6 message we send. The lookup involves querying the flow cache (flow_cache_lookup) and, in case of a cache miss, creating an XFRM bundle. If we miss the flow cache, we can end up creating a new bundle and deriving the path MTU (xfrm_init_pmtu) from on an already transformed dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down to xfrm_lookup(). This can happen only if we're caching the dst_entry in the socket, that is when we're using a connected UDP socket. To put it another way, the path MTU shrinks each time we miss the flow cache, which later on leads to incorrectly fragmented payload. It can be observed with ESPv6 in transport mode: 1) Set up a transformation and lower the MTU to trigger fragmentation # ip xfrm policy add dir out src ::1 dst ::1 \ tmpl src ::1 dst ::1 proto esp spi 1 # ip xfrm state add src ::1 dst ::1 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b # ip link set dev lo mtu 1500 2) Monitor the packet flow and set up an UDP sink # tcpdump -ni lo -ttt & # socat udp6-listen:12345,fork /dev/null & 3) Send a datagram that needs fragmentation with a connected socket # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345 2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error 00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448 00:00:00.000014 IP6 ::1 > ::1: frag (1448|32) 00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272 (^ ICMPv6 Parameter Problem) 00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136 4) Compare it to a non-connected socket # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345 00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448 00:00:00.000010 IP6 ::1 > ::1: frag (1448|64) What happens in step (3) is: 1) when connecting the socket in __ip6_datagram_connect(), we perform an XFRM lookup, miss the flow cache, create an XFRM bundle, and cache the destination, 2) afterwards, when sending the datagram, we perform an XFRM lookup, again, miss the flow cache (due to mismatch of flowi6_iif and flowi6_oif, which is an issue of its own), and recreate an XFRM bundle based on the cached (and already transformed) destination. To prevent the recreation of an XFRM bundle, avoid an XFRM lookup altogether whenever we already have a destination entry cached in the socket. This prevents the path MTU shrinkage and brings us on par with UDPv4. The fix also benefits connected PINGv6 sockets, another user of ip6_sk_dst_lookup_flow(), who also suffer messages being transformed twice. Joint work with Hannes Frederic Sowa. Reported-by: Jan Tluka Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cbf127ae7c67..635b8d340cdb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); From e0d194adfa9f5f473068cc546bee60fb84ab77ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 06:19:45 -0700 Subject: [PATCH 0366/1050] net_sched: add missing paddattr description "make htmldocs" complains otherwise: .//net/core/gen_stats.c:65: warning: No description found for parameter 'padattr' .//net/core/gen_stats.c:101: warning: No description found for parameter 'padattr' Fixes: 9854518ea04d ("sched: align nlattr properly when needed") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/gen_stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f96ee8b9478d..be873e4e3125 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -47,6 +47,7 @@ nla_put_failure: * @xstats_type: TLV type for backward compatibility xstats TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all @@ -87,6 +88,7 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); * @type: TLV type for top level statistic TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all From 3497ed8c852a5a3d48957ca91baaa443d9bfcd4d Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Mon, 6 Jun 2016 17:29:30 +0100 Subject: [PATCH 0367/1050] sfc: report supported link speeds on SFP connections 7000-series SFC NICs connected with an SFP+ module currently fail to report any supported link speeds. Reported-by: Jarod Wilson Signed-off-by: Bert Kenward Reviewed-by: Jarod Wilson Tested-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index 7f295c4d7b80..2a9228a6e4a0 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -189,11 +189,12 @@ static u32 mcdi_to_ethtool_cap(u32 media, u32 cap) case MC_CMD_MEDIA_XFP: case MC_CMD_MEDIA_SFP_PLUS: - result |= SUPPORTED_FIBRE; - break; - case MC_CMD_MEDIA_QSFP_PLUS: result |= SUPPORTED_FIBRE; + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + result |= SUPPORTED_1000baseT_Full; + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + result |= SUPPORTED_10000baseT_Full; if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) result |= SUPPORTED_40000baseCR4_Full; break; From c106c21ce02e366f3dc887bff7c48f3f140c45c9 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 25 May 2016 22:40:42 +0000 Subject: [PATCH 0368/1050] ARM: dts: socfpga: Add missing PHY phandle Add missing PHY phandle into the DT, otherwise the stmmac code won't detect the PHY correctly anymore. Signed-off-by: Marek Vasut Cc: Dinh Nguyen Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a3601e4c0a2e..b844473601d2 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -136,6 +136,7 @@ &gmac1 { status = "okay"; phy-mode = "rgmii"; + phy-handle = <&phy1>; snps,reset-gpio = <&porta 0 GPIO_ACTIVE_LOW>; snps,reset-active-low; From 48f67d62194952617dcade08194abc7f5cb3f50c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 6 Jun 2016 09:46:11 +0800 Subject: [PATCH 0369/1050] ideapad_laptop: Add an event for mic mute hotkey Newer ideapads support a new mic hotkey implemented via an ACPI interface. This patch converts the mic mute event to a keycode KEY_MICMUTE. Signed-off-by: Alex Hung Acked-by: Ike Panhc Signed-off-by: Darren Hart --- drivers/platform/x86/ideapad-laptop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 4a23fbc66b71..d1a091b93192 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -567,6 +567,7 @@ static void ideapad_sysfs_exit(struct ideapad_private *priv) static const struct key_entry ideapad_keymap[] = { { KE_KEY, 6, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 7, { KEY_CAMERA } }, + { KE_KEY, 8, { KEY_MICMUTE } }, { KE_KEY, 11, { KEY_F16 } }, { KE_KEY, 13, { KEY_WLAN } }, { KE_KEY, 16, { KEY_PROG1 } }, @@ -809,6 +810,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) break; case 13: case 11: + case 8: case 7: case 6: ideapad_input_report(priv, vpc_bit); From 0118c2d3eac0545d4095877e5a015b5dc763b3c2 Mon Sep 17 00:00:00 2001 From: Dennis Wassenberg Date: Wed, 8 Jun 2016 10:54:25 -0400 Subject: [PATCH 0370/1050] thinkpad_acpi: Add support for HKEY version 0x200 Lenovo Thinkpad devices T460, T460s, T460p, T560, X260 use HKEY version 0x200 without adaptive keyboard. HKEY version 0x200 has method MHKA with one parameter value. Passing parameter value 1 will get hotkey_all_mask (the same like HKEY version 0x100 without parameter). Passing parameter value 2 to MHKA method will retrieve hotkey_all_adaptive_mask. If 0 is returned in that case there is no adaptive keyboard available. Signed-off-by: Dennis Wassenberg Signed-off-by: Lyude Tested-by: Lyude Tested-by: Marco Trevisan Acked-by: Henrique de Moraes Holschuh [dvhart: Keep MHKA error string on one line in new and existing pr_err calls] Signed-off-by: Darren Hart --- drivers/platform/x86/thinkpad_acpi.c | 87 ++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c3bfa1fe95bf..b65ce7519411 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2043,6 +2043,7 @@ static int hotkey_autosleep_ack; static u32 hotkey_orig_mask; /* events the BIOS had enabled */ static u32 hotkey_all_mask; /* all events supported in fw */ +static u32 hotkey_adaptive_all_mask; /* all adaptive events supported in fw */ static u32 hotkey_reserved_mask; /* events better left disabled */ static u32 hotkey_driver_mask; /* events needed by the driver */ static u32 hotkey_user_mask; /* events visible to userspace */ @@ -2742,6 +2743,17 @@ static ssize_t hotkey_all_mask_show(struct device *dev, static DEVICE_ATTR_RO(hotkey_all_mask); +/* sysfs hotkey all_mask ----------------------------------------------- */ +static ssize_t hotkey_adaptive_all_mask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%08x\n", + hotkey_adaptive_all_mask | hotkey_source_mask); +} + +static DEVICE_ATTR_RO(hotkey_adaptive_all_mask); + /* sysfs hotkey recommended_mask --------------------------------------- */ static ssize_t hotkey_recommended_mask_show(struct device *dev, struct device_attribute *attr, @@ -2985,6 +2997,7 @@ static struct attribute *hotkey_attributes[] __initdata = { &dev_attr_wakeup_hotunplug_complete.attr, &dev_attr_hotkey_mask.attr, &dev_attr_hotkey_all_mask.attr, + &dev_attr_hotkey_adaptive_all_mask.attr, &dev_attr_hotkey_recommended_mask.attr, #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL &dev_attr_hotkey_source_mask.attr, @@ -3321,20 +3334,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) if (!tp_features.hotkey) return 1; - /* - * Check if we have an adaptive keyboard, like on the - * Lenovo Carbon X1 2014 (2nd Gen). - */ - if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) { - if ((hkeyv >> 8) == 2) { - tp_features.has_adaptive_kbd = true; - res = sysfs_create_group(&tpacpi_pdev->dev.kobj, - &adaptive_kbd_attr_group); - if (res) - goto err_exit; - } - } - quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable, ARRAY_SIZE(tpacpi_hotkey_qtable)); @@ -3357,30 +3356,70 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) A30, R30, R31, T20-22, X20-21, X22-24. Detected by checking for HKEY interface version 0x100 */ if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) { - if ((hkeyv >> 8) != 1) { - pr_err("unknown version of the HKEY interface: 0x%x\n", - hkeyv); - pr_err("please report this to %s\n", TPACPI_MAIL); - } else { + vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, + "firmware HKEY interface version: 0x%x\n", + hkeyv); + + switch (hkeyv >> 8) { + case 1: /* * MHKV 0x100 in A31, R40, R40e, * T4x, X31, and later */ - vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, - "firmware HKEY interface version: 0x%x\n", - hkeyv); /* Paranoia check AND init hotkey_all_mask */ if (!acpi_evalf(hkey_handle, &hotkey_all_mask, "MHKA", "qd")) { - pr_err("missing MHKA handler, " - "please report this to %s\n", + pr_err("missing MHKA handler, please report this to %s\n", TPACPI_MAIL); /* Fallback: pre-init for FN+F3,F4,F12 */ hotkey_all_mask = 0x080cU; } else { tp_features.hotkey_mask = 1; } + break; + + case 2: + /* + * MHKV 0x200 in X1, T460s, X260, T560, X1 Tablet (2016) + */ + + /* Paranoia check AND init hotkey_all_mask */ + if (!acpi_evalf(hkey_handle, &hotkey_all_mask, + "MHKA", "dd", 1)) { + pr_err("missing MHKA handler, please report this to %s\n", + TPACPI_MAIL); + /* Fallback: pre-init for FN+F3,F4,F12 */ + hotkey_all_mask = 0x080cU; + } else { + tp_features.hotkey_mask = 1; + } + + /* + * Check if we have an adaptive keyboard, like on the + * Lenovo Carbon X1 2014 (2nd Gen). + */ + if (acpi_evalf(hkey_handle, &hotkey_adaptive_all_mask, + "MHKA", "dd", 2)) { + if (hotkey_adaptive_all_mask != 0) { + tp_features.has_adaptive_kbd = true; + res = sysfs_create_group( + &tpacpi_pdev->dev.kobj, + &adaptive_kbd_attr_group); + if (res) + goto err_exit; + } + } else { + tp_features.has_adaptive_kbd = false; + hotkey_adaptive_all_mask = 0x0U; + } + break; + + default: + pr_err("unknown version of the HKEY interface: 0x%x\n", + hkeyv); + pr_err("please report this to %s\n", TPACPI_MAIL); + break; } } From 25789f95a8834d154e5c1f0c9df9a7faedeae98e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 26 May 2016 11:43:23 +0200 Subject: [PATCH 0371/1050] platform/x86: Drop duplicate dependencies on X86 The whole menu depends on X86 so there is no point in repeating this dependency on individual driver entries. Signed-off-by: Jean Delvare Signed-off-by: Darren Hart --- drivers/platform/x86/Kconfig | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index c06bb85c2839..3ec0025d19e7 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -103,7 +103,6 @@ config DELL_SMBIOS config DELL_LAPTOP tristate "Dell Laptop Extras" - depends on X86 depends on DELL_SMBIOS depends on DMI depends on BACKLIGHT_CLASS_DEVICE @@ -505,7 +504,7 @@ config THINKPAD_ACPI_HOTKEY_POLL config SENSORS_HDAPS tristate "Thinkpad Hard Drive Active Protection System (hdaps)" - depends on INPUT && X86 + depends on INPUT select INPUT_POLLDEV default n help @@ -749,7 +748,7 @@ config TOSHIBA_WMI config ACPI_CMPC tristate "CMPC Laptop Extras" - depends on X86 && ACPI + depends on ACPI depends on RFKILL || RFKILL=n select INPUT select BACKLIGHT_CLASS_DEVICE @@ -848,7 +847,7 @@ config INTEL_IMR config INTEL_PMC_CORE bool "Intel PMC Core driver" - depends on X86 && PCI + depends on PCI ---help--- The Intel Platform Controller Hub for Intel Core SoCs provides access to Power Management Controller registers via a PCI interface. This @@ -860,7 +859,7 @@ config INTEL_PMC_CORE config IBM_RTL tristate "Device driver to enable PRTL support" - depends on X86 && PCI + depends on PCI ---help--- Enable support for IBM Premium Real Time Mode (PRTM). This module will allow you the enter and exit PRTM in the BIOS via @@ -894,7 +893,6 @@ config XO15_EBOOK config SAMSUNG_LAPTOP tristate "Samsung Laptop driver" - depends on X86 depends on RFKILL || RFKILL = n depends on ACPI_VIDEO || ACPI_VIDEO = n depends on BACKLIGHT_CLASS_DEVICE From ce492b3b8f99cf9d2f807ec22d8805c996a09503 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 3 Jun 2016 14:21:34 -0700 Subject: [PATCH 0372/1050] drm/fsl-dcu: use flat regmap cache Using flat regmap cache instead of RB-tree to avoid the following lockdep warning on driver load: WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0x15c/0x160() DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) The RB-tree regmap cache needs to allocate new space on first writes. However, allocations in an atomic context (e.g. when a spinlock is held) are not allowed. The function regmap_write calls map->lock, which acquires a spinlock in the fast_io case. Since the FSL DCU driver uses MMIO, the regmap bus of type regmap_mmio is being used which has fast_io set to true. Use flat regmap cache and specify max register to be large enouth to cover all registers available in LS1021a and Vybrids register space. Signed-off-by: Stefan Agner Cc: Mark Brown Cc: stable@vger.kernel.org --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 0ec1ad961e0d..dc723f7ead7d 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -42,9 +42,10 @@ static const struct regmap_config fsl_dcu_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_FLAT, .volatile_reg = fsl_dcu_drm_is_volatile_reg, + .max_register = 0x11fc, }; static int fsl_dcu_drm_irq_init(struct drm_device *dev) From 85332739628fe4beafecdb713438c7cb1454c2f5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 3 Jun 2016 14:27:03 +0300 Subject: [PATCH 0373/1050] drm/omap: fix unused variable warning in dsi & hdmi Signed-off-by: Tomi Valkeinen Signed-off-by: Dave Airlie --- drivers/gpu/drm/omapdrm/dss/dsi.c | 1 - drivers/gpu/drm/omapdrm/dss/hdmi5.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 9ed8272e54ae..56c43f355ce3 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev) { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); struct regulator *vdds_dsi; - int r; if (dsi->vdds_dsi_reg != NULL) return 0; diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index e129245eb8a9..9255c0e1e4a7 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -120,7 +120,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data) static int hdmi_init_regulator(void) { - int r; struct regulator *reg; if (hdmi.vdda_reg != NULL) From 72d8c36ec364c82bf1bf0c64dfa1041cfaf139f7 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 7 Jun 2016 14:53:56 +0800 Subject: [PATCH 0374/1050] scsi: fix race between simultaneous decrements of ->host_failed sas_ata_strategy_handler() adds the works of the ata error handler to system_unbound_wq. This workqueue asynchronously runs work items, so the ata error handler will be performed concurrently on different CPUs. In this case, ->host_failed will be decreased simultaneously in scsi_eh_finish_cmd() on different CPUs, and become abnormal. It will lead to permanently inequality between ->host_failed and ->host_busy, and scsi error handler thread won't start running. IO errors after that won't be handled. Since all scmds must have been handled in the strategy handler, just remove the decrement in scsi_eh_finish_cmd() and zero ->host_busy after the strategy handler to fix this race. Fixes: 50824d6c5657 ("[SCSI] libsas: async ata-eh") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: James Bottomley Signed-off-by: Martin K. Petersen --- Documentation/scsi/scsi_eh.txt | 8 ++++++-- drivers/ata/libata-eh.c | 2 +- drivers/scsi/scsi_error.c | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt index 8638f61c8c9d..37eca00796ee 100644 --- a/Documentation/scsi/scsi_eh.txt +++ b/Documentation/scsi/scsi_eh.txt @@ -263,19 +263,23 @@ scmd->allowed. 3. scmd recovered ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd - - shost->host_failed-- - clear scmd->eh_eflags - scsi_setup_cmd_retry() - move from local eh_work_q to local eh_done_q LOCKING: none + CONCURRENCY: at most one thread per separate eh_work_q to + keep queue manipulation lockless 4. EH completes ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper - layer of failure. + layer of failure. May be called concurrently but must have + a no more than one thread per separate eh_work_q to + manipulate the queue locklessly - scmd is removed from eh_done_q and scmd->eh_entry is cleared - if retry is necessary, scmd is requeued using scsi_queue_insert() - otherwise, scsi_finish_command() is invoked for scmd + - zero shost->host_failed LOCKING: queue or finish function performs appropriate locking diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 961acc788f44..91a9e6af2ec4 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host) ata_scsi_port_error_handler(host, ap); /* finish or retry handled scmd's and clean up */ - WARN_ON(host->host_failed || !list_empty(&eh_work_q)); + WARN_ON(!list_empty(&eh_work_q)); DPRINTK("EXIT\n"); } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 984ddcb4786d..1b9c049bd5c5 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1127,7 +1127,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) */ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) { - scmd->device->host->host_failed--; scmd->eh_eflags = 0; list_move_tail(&scmd->eh_entry, done_q); } @@ -2226,6 +2225,9 @@ int scsi_error_handler(void *data) else scsi_unjam_host(shost); + /* All scmds have been handled */ + shost->host_failed = 0; + /* * Note - if the above fails completely, the action is to take * individual devices offline and flush the queue of any From 6eef3801e719e4ea9c15c01b1d77706f47331166 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:03 +0100 Subject: [PATCH 0375/1050] net: cls_u32: catch all hardware offload errors Errors reported by u32_replace_hw_hnode() were not propagated. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 27b99fd774d7..54ab32a8ff4c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -922,11 +922,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + + err = u32_replace_hw_hnode(tp, ht, flags); + if (err) { + kfree(ht); + return err; + } + RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; - u32_replace_hw_hnode(tp, ht, flags); return 0; } From 201c44bd8ffa899f07b7b322a73e19baf0ada1e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:04 +0100 Subject: [PATCH 0376/1050] net: cls_u32: be more strict about skip-sw flag for knodes Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). This patch fixes the knode handling. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 54ab32a8ff4c..ffe593efe930 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -508,27 +508,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, tp, flags)) { - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; -#ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; -#else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; -#endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; - if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; +#ifdef CONFIG_CLS_U32_MARK + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; +#else + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; +#endif + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } From 0a46baaf634663d28038fc137239b71bf5385e5a Mon Sep 17 00:00:00 2001 From: Shweta Choudaha Date: Wed, 8 Jun 2016 20:15:43 +0100 Subject: [PATCH 0377/1050] ip6gre: Allow live link address change The ip6 GRE tap device should not be forced to down state to change the mac address and should allow live address change for tap device similar to ipv4 gre. Signed-off-by: Shweta Choudaha Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f4ac2842d4d9..fdc9de276ab1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1256,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (ret) return ret; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); @@ -1289,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], From 9b15350f0d5c401c02eca15c4e6ca0603cff1a41 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jun 2016 23:23:01 +0200 Subject: [PATCH 0378/1050] qfq: don't leak skb if kzalloc fails When we need to create a new aggregate to enqueue the skb we call kzalloc. If that fails we returned ENOBUFS without freeing the skb. Spotted during code review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d953432..f18857febdad 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1235,8 +1235,10 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, qdisc_pkt_len(skb)); - if (err) - return err; + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch); + } } err = qdisc_enqueue(skb, cl->qdisc); From 35639a0e98391036a4c7f23253c321d6621a8897 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 9 Jun 2016 11:32:14 +0530 Subject: [PATCH 0379/1050] ALSA: hda - Add PCI ID for Kabylake Kabylake shows up as PCI ID 0xa171. And Kabylake-LP as 0x9d71. Since these are similar to Skylake add these to SKL_PLUS macro Signed-off-by: Vinod Koul Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 9a0d1445ca5c..94089fc71884 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -365,8 +365,11 @@ enum { #define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171) +#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ + IS_KBL(pci) || IS_KBL_LP(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2181,6 +2184,12 @@ static const struct pci_device_id azx_ids[] = { /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake */ + { PCI_DEVICE(0x8086, 0xa171), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake-LP */ + { PCI_DEVICE(0x8086, 0x9d71), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, From 2e4094bdaa3ef295abbebb31b978e3344ee64257 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 19 May 2016 18:20:17 -0500 Subject: [PATCH 0380/1050] ARM: OMAP2: Enable Errata 430973 for OMAP3 Enable Erratum 430973 similar to commit 5c86c5339c56 ("ARM: omap2plus_defconfig: Enable ARM erratum 430973 for omap3") - Since multiple defconfigs can exist from various points of view (multi_v7, omap2plus etc.. it is always better to enable the erratum from the Kconfig selection point of view so that downstream kernels dont have to rediscover this all over again. Reported-by: Grygorii Strashko Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..b9b71328249b 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -17,6 +17,7 @@ config ARCH_OMAP3 select PM_OPP if PM select PM if CPU_IDLE select SOC_HAS_OMAP2_SDRC + select ARM_ERRATA_430973 config ARCH_OMAP4 bool "TI OMAP4" From 4c88c1c72f86dab63d8219c0aa9e9a398f2efaa9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 20 May 2016 13:13:33 +0300 Subject: [PATCH 0381/1050] ARM: dts: DRA74x: fix DSS PLL2 addresses DSS's 'pll2_clkctrl' and 'pll2' have wrong addresses in the dra74x.dtsi file. Video PLL2 has not been used so wrong addresses went unnoticed. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra74x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi index 4220eeffc65a..5e06020f450b 100644 --- a/arch/arm/boot/dts/dra74x.dtsi +++ b/arch/arm/boot/dts/dra74x.dtsi @@ -107,8 +107,8 @@ reg = <0x58000000 0x80>, <0x58004054 0x4>, <0x58004300 0x20>, - <0x58005054 0x4>, - <0x58005300 0x20>; + <0x58009054 0x4>, + <0x58009300 0x20>; reg-names = "dss", "pll1_clkctrl", "pll1", "pll2_clkctrl", "pll2"; From 8d29bdba7291f9f939bc17ac088ab650d106d451 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 24 May 2016 11:12:29 -0500 Subject: [PATCH 0382/1050] ARM: OMAP2+: Select OMAP_INTERCONNECT for SOC_AM43XX AM43XX SoCs make use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it always gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index b9b71328249b..e6405c094f37 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -37,6 +37,7 @@ config ARCH_OMAP4 select PM if CPU_IDLE select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_OMAP5 bool "TI OMAP5" From 6cbf6236d54c24b9a29e6892549c25b6902b44ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jun 2016 09:40:55 +0200 Subject: [PATCH 0383/1050] cfg80211: remove get/set antenna and tx power warnings Since set_tx_power and set_antenna are frequently implemented without the matching get_tx_power/get_antenna, we shouldn't have added warnings for those. Remove them. The remaining ones are correct and need to be implemented symmetrically for correct operation. Cc: stable@vger.kernel.org Fixes: de3bb771f471 ("cfg80211: add more warnings for inconsistent ops") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index d25c82bc1bbe..ecca3896b9f7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -363,8 +363,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel); WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch); WARN_ON(ops->add_tx_ts && !ops->del_tx_ts); - WARN_ON(ops->set_tx_power && !ops->get_tx_power); - WARN_ON(ops->set_antenna && !ops->get_antenna); alloc_size = sizeof(*rdev) + sizeof_priv; From 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724 Mon Sep 17 00:00:00 2001 From: Prasun Maiti Date: Mon, 6 Jun 2016 20:04:19 +0530 Subject: [PATCH 0384/1050] wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel iwpriv app uses iw_point structure to send data to Kernel. The iw_point structure holds a pointer. For compatibility Kernel converts the pointer as required for WEXT IOCTLs (SIOCIWFIRST to SIOCIWLAST). Some drivers may use iw_handler_def.private_args to populate iwpriv commands instead of iw_handler_def.private. For those case, the IOCTLs from SIOCIWFIRSTPRIV to SIOCIWLASTPRIV will follow the path ndo_do_ioctl(). Accordingly when the filled up iw_point structure comes from 32 bit iwpriv to 64 bit Kernel, Kernel will not convert the pointer and sends it to driver. So, the driver may get the invalid data. The pointer conversion for the IOCTLs (SIOCIWFIRSTPRIV to SIOCIWLASTPRIV), which follow the path ndo_do_ioctl(), is mandatory. This patch adds pointer conversion from 32 bit to 64 bit and vice versa, if the ioctl comes from 32 bit iwpriv to 64 bit Kernel. Cc: stable@vger.kernel.org Signed-off-by: Prasun Maiti Signed-off-by: Ujjal Roy Tested-by: Dibyajyoti Ghosh Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6250b1cfcde5..dbb2738e356a 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -958,8 +958,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } From 20c15226d1c73150c4d9107301cac5dda0b7f995 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 11 May 2016 16:39:30 -0300 Subject: [PATCH 0385/1050] ARM: imx6ul: Fix Micrel PHY mask The value used for Micrel PHY mask is not correct. Use the MICREL_PHY_ID_MASK definition instead. Thanks to Jiri Luznicky for proposing the fix at https://community.freescale.com/thread/387739 Cc: Fixes: 709bc0657fe6f9f55 ("ARM: imx6ul: add fec MAC refrence clock and phy fixup init") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6ul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c index a38b16b69923..b56de4b8cdf2 100644 --- a/arch/arm/mach-imx/mach-imx6ul.c +++ b/arch/arm/mach-imx/mach-imx6ul.c @@ -46,7 +46,7 @@ static int ksz8081_phy_fixup(struct phy_device *dev) static void __init imx6ul_enet_phy_init(void) { if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KSZ8081, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_KSZ8081, MICREL_PHY_ID_MASK, ksz8081_phy_fixup); } From d1a7f7aadc866eff61c422da0ecbe1f8e383e0f5 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Tue, 24 May 2016 09:26:27 +0800 Subject: [PATCH 0386/1050] drm/amdgpu/iceland: Set SC_PA_RASTER_CONFIG according to different RB enabled fix the raster config setting for different iceland configs. Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index f19bab68fd83..4d747ba331db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3974,11 +3974,15 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x3a00161a); amdgpu_ring_write(ring, 0x0000002e); break; - case CHIP_TOPAZ: case CHIP_CARRIZO: amdgpu_ring_write(ring, 0x00000002); amdgpu_ring_write(ring, 0x00000000); break; + case CHIP_TOPAZ: + amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? + 0x00000000 : 0x00000002); + amdgpu_ring_write(ring, 0x00000000); + break; case CHIP_STONEY: amdgpu_ring_write(ring, 0x00000000); amdgpu_ring_write(ring, 0x00000000); From 8b4af8a8e3e6ad82e0d32d1665d9a755c05c4c12 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 23 May 2016 18:24:41 +0800 Subject: [PATCH 0387/1050] drm/admgpu/powerplay/polaris: fix powertune table upload Exclude AVFS related fields when update powertune table to hw. The driver shouldn't set them directly. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c index 0b99ab3ba0c5..ae96f14b827c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c @@ -286,7 +286,7 @@ int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr) if (polaris10_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset, (uint8_t *)&data->power_tune_table, - sizeof(struct SMU74_Discrete_PmFuses), data->sram_end)) + (sizeof(struct SMU74_Discrete_PmFuses) - 92), data->sram_end)) PP_ASSERT_WITH_CODE(false, "Attempt to download PmFuseTable Failed!", return -EINVAL); From 212cb3b6d79bb9f525da5593133d93b107184b27 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:35:17 +0800 Subject: [PATCH 0388/1050] drm/amdgpu: add late_fini for ip_funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This give IP modules an optional late cleanup function. This is needed to handle tricky inter-module dependencies during tear down. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 6080951d539d..afce1edbe250 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -157,6 +157,7 @@ struct amd_ip_funcs { int (*hw_init)(void *handle); /* tears down the hw state */ int (*hw_fini)(void *handle); + void (*late_fini)(void *handle); /* handles IP specific hw/sw changes for suspend */ int (*suspend)(void *handle); /* handles IP specific hw/sw changes for resume */ From 482587e3145ef4100b52946660ae52b457d09194 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:36:01 +0800 Subject: [PATCH 0389/1050] drm/amdgpu: impl late_fini for amdgpu_pp_ip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements late_init support for powerplay. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 6bd961fb43dc..1cd53c65918b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -223,6 +223,22 @@ static int amdgpu_pp_hw_fini(void *handle) return ret; } +static void amdgpu_pp_late_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +#ifdef CONFIG_DRM_AMD_POWERPLAY + if (adev->pp_enabled) { + amdgpu_pm_sysfs_fini(adev); + amd_powerplay_fini(adev->powerplay.pp_handle); + } + + if (adev->powerplay.ip_funcs->late_fini) + adev->powerplay.ip_funcs->late_fini( + adev->powerplay.pp_handle); +#endif +} + static int amdgpu_pp_suspend(void *handle) { int ret = 0; @@ -311,6 +327,7 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = { .sw_fini = amdgpu_pp_sw_fini, .hw_init = amdgpu_pp_hw_init, .hw_fini = amdgpu_pp_hw_fini, + .late_fini = amdgpu_pp_late_fini, .suspend = amdgpu_pp_suspend, .resume = amdgpu_pp_resume, .is_idle = amdgpu_pp_is_idle, From a6dcfd9cc55432e4dcbe058d6ae9f07fb3452992 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:36:34 +0800 Subject: [PATCH 0390/1050] drm/amdgpu: fix pplib finish bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1,should use late_fini to kfree all resource otherwise the released pointer maybe accessed in IRQ ip fini routine. 2,hwmgr should not be kfree by pem_fini which is invoked by hw fini path. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 7 ------- drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c | 3 --- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bb8b149786d7..1996670b6751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1325,6 +1325,11 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_block_status[i].valid = false; } + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (adev->ip_blocks[i].funcs->late_fini) + adev->ip_blocks[i].funcs->late_fini((void *)adev); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 1cd53c65918b..10b1be51318b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -183,13 +183,6 @@ static int amdgpu_pp_sw_fini(void *handle) if (ret) return ret; -#ifdef CONFIG_DRM_AMD_POWERPLAY - if (adev->pp_enabled) { - amdgpu_pm_sysfs_fini(adev); - amd_powerplay_fini(adev->powerplay.pp_handle); - } -#endif - return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c index 46410e3c7349..fb88e4e5d625 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c @@ -58,9 +58,6 @@ static void pem_fini(struct pp_eventmgr *eventmgr) pem_unregister_interrupts(eventmgr); pem_handle_event(eventmgr, AMD_PP_EVENT_UNINITIALIZE, &event_data); - - if (eventmgr != NULL) - kfree(eventmgr); } int eventmgr_init(struct pp_instance *handle) From 768c95e70c4bd33b3da32a15dd33486246f4ca79 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Jun 2016 11:09:01 -0400 Subject: [PATCH 0391/1050] drm/amdgpu: fix fw leak in non-powerplay dpm code We need to release the firmware on driver tear down. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 3 +++ drivers/gpu/drm/amd/amdgpu/fiji_dpm.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_dpm.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 5 +++++ 4 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index ea407db1fbcf..5ec1f1e9c983 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -6221,6 +6221,9 @@ static int ci_dpm_sw_fini(void *handle) ci_dpm_fini(adev); mutex_unlock(&adev->pm.mutex); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c index 245cabf06575..ed03b75175d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c @@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle) static int fiji_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c index 460bc8ad37e6..825ccd63f2dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c @@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle) static int iceland_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index b7615cefcac4..f06f6f4dc3a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle) static int tonga_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } From 9a005bef5b5b5ceb78ff1138e6d6baf4bbeb8061 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:55:07 +0800 Subject: [PATCH 0392/1050] drm/amdgpu: clear SA bo when created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This help fix reloading driver hang issue of SDMA ring Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8bf84efafb04..48618ee324eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, return r; } r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); + memset(sa_manager->cpu_ptr, 0, sa_manager->size); amdgpu_bo_unreserve(sa_manager->bo); return r; } From d72f7c0685870aa7efda0a06f8ca160a94905031 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:55:50 +0800 Subject: [PATCH 0393/1050] drm/amdgpu: init more register for sdma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This help fix reloading driver hang issue of SDMA ring Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 518dca43b133..76f73ab9294e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -419,6 +419,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f4c3130d3fdb..e11a37416e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -461,6 +461,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 31d99b0010f7..585d8fe22bae 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -672,6 +672,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], From 505dfe76cd3203bb2dcf13d862f46b7f0e95869a Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:57:14 +0800 Subject: [PATCH 0394/1050] drm/amdgpu: modify sdma start sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit should fist halt engine, and then doing the register programing, and later unhalt engine, and finally run ring_test. this help fix reloading driver hang issue of SDMA ring original sequence is wrong for it programing engine after unhalt, which will lead to fault behavior when doing driver reloading after unloaded. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 +++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 76f73ab9294e..0079916e6d93 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -448,7 +448,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + cik_sdma_enable(adev, true); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -531,8 +536,8 @@ static int cik_sdma_start(struct amdgpu_device *adev) if (r) return r; - /* unhalt the MEs */ - cik_sdma_enable(adev, true); + /* halt the engine before programing */ + cik_sdma_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = cik_sdma_gfx_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index e11a37416e19..f6014b024f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -491,7 +491,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + sdma_v2_4_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -582,8 +586,8 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) return -EINVAL; } - /* unhalt the MEs */ - sdma_v2_4_enable(adev, true); + /* halt the engine before programing */ + sdma_v2_4_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v2_4_gfx_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 585d8fe22bae..33605d4ac2d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -713,7 +713,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + /* unhalt the MEs */ + sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -806,10 +814,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* unhalt the MEs */ - sdma_v3_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v3_0_ctx_switch_enable(adev, true); + /* disble sdma engine before programing it */ + sdma_v3_0_ctx_switch_enable(adev, false); + sdma_v3_0_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); From fdff8cfa72b3e42d4d0c70684fa18b1dfee46d97 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 24 May 2016 13:23:46 +0800 Subject: [PATCH 0395/1050] drm/amdgpu: vBIOS post only call when mem_size zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1996670b6751..1727a4d998a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1518,8 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_atombios_has_gpu_virtualization_table(adev); /* Post card if necessary */ - if (!amdgpu_card_posted(adev) || - adev->virtualization.supports_sr_iov) { + if (!amdgpu_card_posted(adev)) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; From 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 27 Apr 2016 18:07:41 +0800 Subject: [PATCH 0396/1050] drm/amdgpu: add pipeline sync while vmid switch in same ctx Since vmid-mgr supports vmid sharing in one vm, the same ctx could get different vmids for two emits without vm flush, vm_flush could be done in another ring. Signed-off-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 992f00b65be4..01c36b8d6222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -799,6 +799,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + int vmid; }; /* @@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size); + uint32_t oa_base, uint32_t oa_size, + bool vmid_switch); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 34e35423b78e..7a0b1e50f293 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; + int vmid = 0, old_vmid = ring->vmid; struct fence *hwf; uint64_t ctx; @@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job) { vm = job->vm; ctx = job->ctx; + vmid = job->vm_id; } else { vm = NULL; ctx = 0; + vmid = 0; } if (!ring->ready) { @@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, - job->oa_base, job->oa_size); + job->oa_base, job->oa_size, + (ring->current_ctx == ctx) && (old_vmid != vmid)); if (r) { amdgpu_ring_undo(ring); return r; @@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; @@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; + ring->vmid = vmid; } if (ring->funcs->emit_hdp_invalidate) @@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); + ring->vmid = old_vmid; amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9f36ed30ba11..62a4c127620f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) + uint32_t oa_base, uint32_t oa_size, + bool vmid_switch) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; @@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, int r; if (ring->funcs->emit_pipeline_sync && ( - pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || - ring->type == AMDGPU_RING_TYPE_COMPUTE)) + pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch)) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && From 3a3e88804d44e41ef2182d2a6577a6803fdd9ee0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 17:52:58 +0800 Subject: [PATCH 0397/1050] drm/amdgpu: fix mem leak in smumgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index c483baf6b4fb..0728c1e3d97a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -81,6 +81,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle) int smum_fini(struct pp_smumgr *smumgr) { + kfree(smumgr->device); kfree(smumgr); return 0; } From 61da601b95cd5565d047e42e73f984f5bdfbba70 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 19:09:06 +0800 Subject: [PATCH 0398/1050] drm/amdgpu: fix mem leak in pplib/hwmgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 1c48917da3cf..910d56dcd7b1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -93,6 +93,13 @@ int hwmgr_fini(struct pp_hwmgr *hwmgr) if (hwmgr == NULL || hwmgr->ps == NULL) return -EINVAL; + /* do hwmgr finish*/ + kfree(hwmgr->backend); + + kfree(hwmgr->start_thermal_controller.function_list); + + kfree(hwmgr->set_temperature_range.function_list); + kfree(hwmgr->ps); kfree(hwmgr); return 0; From 89e0ec9f5e27a8c5b5954290bef703dc6aac44f3 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 19:34:11 +0800 Subject: [PATCH 0399/1050] drm/amdgpu: fix mem leak in atombios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1727a4d998a4..964f31404f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -827,8 +827,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) */ static void amdgpu_atombios_fini(struct amdgpu_device *adev) { - if (adev->mode_info.atom_context) + if (adev->mode_info.atom_context) { kfree(adev->mode_info.atom_context->scratch); + kfree(adev->mode_info.atom_context->iio); + } kfree(adev->mode_info.atom_context); adev->mode_info.atom_context = NULL; kfree(adev->mode_info.atom_card_info); From 9d8f086cd05954e03f10db1a9a52a240d086dc8c Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 13:43:45 +0800 Subject: [PATCH 0400/1050] drm/amdgpu: fix memleak in pptable_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 8 ++- .../powerplay/hwmgr/tonga_processpptables.c | 54 +++++++++---------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 8e345bfddb69..e629f8a9fe93 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -73,11 +73,14 @@ static int pp_sw_init(void *handle) ret = hwmgr->hwmgr_func->backend_init(hwmgr); if (ret) - goto err; + goto err1; pr_info("amdgpu: powerplay initialized\n"); return 0; +err1: + if (hwmgr->pptable_func->pptable_fini) + hwmgr->pptable_func->pptable_fini(hwmgr); err: pr_err("amdgpu: powerplay initialization failed\n"); return ret; @@ -100,6 +103,9 @@ static int pp_sw_fini(void *handle) if (hwmgr->hwmgr_func->backend_fini != NULL) ret = hwmgr->hwmgr_func->backend_fini(hwmgr); + if (hwmgr->pptable_func->pptable_fini) + hwmgr->pptable_func->pptable_fini(hwmgr); + return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c index 10e3630ee39d..296ec7ef6d45 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c @@ -1040,48 +1040,44 @@ int tonga_pp_tables_uninitialize(struct pp_hwmgr *hwmgr) struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); - if (NULL != hwmgr->soft_pp_table) { - kfree(hwmgr->soft_pp_table); + if (NULL != hwmgr->soft_pp_table) hwmgr->soft_pp_table = NULL; - } - if (NULL != pp_table_information->vdd_dep_on_sclk) - pp_table_information->vdd_dep_on_sclk = NULL; + kfree(pp_table_information->vdd_dep_on_sclk); + pp_table_information->vdd_dep_on_sclk = NULL; - if (NULL != pp_table_information->vdd_dep_on_mclk) - pp_table_information->vdd_dep_on_mclk = NULL; + kfree(pp_table_information->vdd_dep_on_mclk); + pp_table_information->vdd_dep_on_mclk = NULL; - if (NULL != pp_table_information->valid_mclk_values) - pp_table_information->valid_mclk_values = NULL; + kfree(pp_table_information->valid_mclk_values); + pp_table_information->valid_mclk_values = NULL; - if (NULL != pp_table_information->valid_sclk_values) - pp_table_information->valid_sclk_values = NULL; + kfree(pp_table_information->valid_sclk_values); + pp_table_information->valid_sclk_values = NULL; - if (NULL != pp_table_information->vddc_lookup_table) - pp_table_information->vddc_lookup_table = NULL; + kfree(pp_table_information->vddc_lookup_table); + pp_table_information->vddc_lookup_table = NULL; - if (NULL != pp_table_information->vddgfx_lookup_table) - pp_table_information->vddgfx_lookup_table = NULL; + kfree(pp_table_information->vddgfx_lookup_table); + pp_table_information->vddgfx_lookup_table = NULL; - if (NULL != pp_table_information->mm_dep_table) - pp_table_information->mm_dep_table = NULL; + kfree(pp_table_information->mm_dep_table); + pp_table_information->mm_dep_table = NULL; - if (NULL != pp_table_information->cac_dtp_table) - pp_table_information->cac_dtp_table = NULL; + kfree(pp_table_information->cac_dtp_table); + pp_table_information->cac_dtp_table = NULL; - if (NULL != hwmgr->dyn_state.cac_dtp_table) - hwmgr->dyn_state.cac_dtp_table = NULL; + kfree(hwmgr->dyn_state.cac_dtp_table); + hwmgr->dyn_state.cac_dtp_table = NULL; - if (NULL != pp_table_information->ppm_parameter_table) - pp_table_information->ppm_parameter_table = NULL; + kfree(pp_table_information->ppm_parameter_table); + pp_table_information->ppm_parameter_table = NULL; - if (NULL != pp_table_information->pcie_table) - pp_table_information->pcie_table = NULL; + kfree(pp_table_information->pcie_table); + pp_table_information->pcie_table = NULL; - if (NULL != hwmgr->pptable) { - kfree(hwmgr->pptable); - hwmgr->pptable = NULL; - } + kfree(hwmgr->pptable); + hwmgr->pptable = NULL; return result; } From 67a6a504af90e58c478b2e7fa6c0af8ed64c995b Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 14:17:42 +0800 Subject: [PATCH 0401/1050] drm/amdgpu: fix missing free wb for cond_exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 3b02272db678..870f9494252c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -343,6 +343,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->ring = NULL; ring->ring_obj = NULL; + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_wb_free(ring->adev, ring->rptr_offs); amdgpu_wb_free(ring->adev, ring->wptr_offs); From 13331ac384e8211a0bf3158a895ac8b22005a622 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 14:18:57 +0800 Subject: [PATCH 0402/1050] drm/amdgpu: fix gfx8 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4d747ba331db..9f6f8669edc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -836,6 +836,26 @@ err1: return r; } + +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + if ((adev->asic_type != CHIP_STONEY) && + (adev->asic_type != CHIP_TOPAZ)) + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + + kfree(adev->gfx.rlc.register_list_format); +} + static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -1983,7 +2003,7 @@ static int gfx_v8_0_sw_fini(void *handle) gfx_v8_0_rlc_fini(adev); - kfree(adev->gfx.rlc.register_list_format); + gfx_v8_0_free_microcode(adev); return 0; } From e517cd77ee76a26f3542d51a1b4f4d7c452f85cf Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:01:48 +0800 Subject: [PATCH 0403/1050] drm/amdgpu: fix gfx 7 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7f18a53ab53a..8c6ad1e72f02 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -991,6 +991,22 @@ out: return err; } +static void gfx_v7_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; +} + /** * gfx_v7_0_tiling_mode_table_init - init the hw tiling table * @@ -4489,6 +4505,7 @@ static int gfx_v7_0_sw_fini(void *handle) gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); + gfx_v7_0_free_microcode(adev); return 0; } From 05f19eb5bd1d46aeeeb86a2a8538f35d7d55eb34 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:13:59 +0800 Subject: [PATCH 0404/1050] drm/amdgpu: fix uvd fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 01abfc21b4a2..e19520c4b4b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -253,19 +253,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int r; - if (adev->uvd.vcpu_bo == NULL) - return 0; + kfree(adev->uvd.saved_bo); amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); - r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); - if (!r) { - amdgpu_bo_kunmap(adev->uvd.vcpu_bo); - amdgpu_bo_unpin(adev->uvd.vcpu_bo); - amdgpu_bo_unreserve(adev->uvd.vcpu_bo); - } + if (adev->uvd.vcpu_bo) { + r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); + if (!r) { + amdgpu_bo_kunmap(adev->uvd.vcpu_bo); + amdgpu_bo_unpin(adev->uvd.vcpu_bo); + amdgpu_bo_unreserve(adev->uvd.vcpu_bo); + } - amdgpu_bo_unref(&adev->uvd.vcpu_bo); + amdgpu_bo_unref(&adev->uvd.vcpu_bo); + } amdgpu_ring_fini(&adev->uvd.ring); From 14d83e78c578a6c45163fb399ee760fe0d314bad Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:15:32 +0800 Subject: [PATCH 0405/1050] drm/amdgpu: fix sdma3 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 33605d4ac2d9..532ea88da66a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -236,6 +236,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v3_0_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v3_0_init_microcode - load ucode images from disk * @@ -1256,6 +1265,7 @@ static int sdma_v3_0_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v3_0_free_microcode(adev); return 0; } From 9c55c5204445689c1d6b7b60e89de7f8fcf8a77f Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:05:58 +0800 Subject: [PATCH 0406/1050] drm/amdgpu: fix sdma24 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f6014b024f2a..b556bd0a8797 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -105,6 +105,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v2_4_init_microcode - load ucode images from disk * @@ -1018,6 +1027,7 @@ static int sdma_v2_4_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v2_4_free_microcode(adev); return 0; } From d1ff53b7c2aa6e8c9dbd37ea7d858eeaba1ecb4a Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:07:40 +0800 Subject: [PATCH 0407/1050] drm/amdgpu: fix cik sdma ucode memleak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 0079916e6d93..9dc4e24e31e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -66,6 +66,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin"); u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + +static void cik_sdma_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /* * sDMA - System DMA * Starting with CIK, the GPU has new asynchronous @@ -1005,6 +1015,7 @@ static int cik_sdma_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + cik_sdma_free_microcode(adev); return 0; } From e6232effab9091472689b1c5604a7e59d320a8e0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:16:04 +0800 Subject: [PATCH 0408/1050] drm/amdgpu: fix fiji smu fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 673a75c74e18..0ac864200283 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -1006,6 +1006,10 @@ static int fiji_smu_init(struct pp_smumgr *smumgr) static int fiji_smu_fini(struct pp_smumgr *smumgr) { + struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); + + smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle); + if (smumgr->backend) { kfree(smumgr->backend); smumgr->backend = NULL; From 86e4cdd675f489e0b3deaa3d6b75cddadd16e71c Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:16:26 +0800 Subject: [PATCH 0409/1050] drm/amdgpu: fix tonga smu_fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 32820b680d88..70d3ecf27bde 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -328,6 +328,11 @@ int tonga_write_smc_sram_dword(struct pp_smumgr *smumgr, static int tonga_smu_fini(struct pp_smumgr *smumgr) { + struct tonga_smumgr *priv = (struct tonga_smumgr *)(smumgr->backend); + + smu_free_memory(smumgr->device, (void *)priv->smu_buffer.handle); + smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle); + if (smumgr->backend != NULL) { kfree(smumgr->backend); smumgr->backend = NULL; From a392746a8c38de494a1a2d00c5cfd34a05449e35 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 31 May 2016 13:44:30 +0800 Subject: [PATCH 0410/1050] drm/amdgpu: add release firmware for cgs Powerplay uses cgs to load the firmware so add a function to release it as well to avoid leaking it on driver unload. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 12 ++++++++++++ drivers/gpu/drm/amd/include/cgs_common.h | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 199f76baf22c..8943099eb135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -696,6 +696,17 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) return result; } +static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) +{ + CGS_FUNC_ADEV; + if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { + release_firmware(adev->pm.fw); + return 0; + } + /* cannot release other firmware because they are not created by cgs */ + return -EINVAL; +} + static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -1125,6 +1136,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { amdgpu_cgs_pm_query_clock_limits, amdgpu_cgs_set_camera_voltages, amdgpu_cgs_get_firmware_info, + amdgpu_cgs_rel_firmware, amdgpu_cgs_set_powergating_state, amdgpu_cgs_set_clockgating_state, amdgpu_cgs_get_active_displays_info, diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index a461e155a160..7464daf89ca1 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -581,6 +581,9 @@ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info); +typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device, + enum cgs_ucode_id type); + typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_powergating_state state); @@ -645,6 +648,7 @@ struct cgs_ops { cgs_set_camera_voltages_t set_camera_voltages; /* Firmware Info */ cgs_get_firmware_info get_firmware_info; + cgs_rel_firmware rel_firmware; /* cg pg interface*/ cgs_set_powergating_state set_powergating_state; cgs_set_clockgating_state set_clockgating_state; @@ -738,6 +742,8 @@ struct cgs_device CGS_CALL(set_camera_voltages,dev,mask,voltages) #define cgs_get_firmware_info(dev, type, info) \ CGS_CALL(get_firmware_info, dev, type, info) +#define cgs_rel_firmware(dev, type) \ + CGS_CALL(rel_firmware, dev, type) #define cgs_set_powergating_state(dev, block_type, state) \ CGS_CALL(set_powergating_state, dev, block_type, state) #define cgs_set_clockgating_state(dev, block_type, state) \ From 5bbc16cc7be89dbe6dd824570456c3340b6d2ef7 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 31 May 2016 13:44:48 +0800 Subject: [PATCH 0411/1050] drm/amdgpu: fix smu ucode memleak (v2) Properly release the smu ucode in powerplay. v2: agd: add polaris as well Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 0ac864200283..8e52a2e82db5 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -1014,6 +1014,8 @@ static int fiji_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index de618ead9db8..043b6ac09d5f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -469,6 +469,7 @@ int polaris10_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 70d3ecf27bde..b22722eabafc 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -337,6 +337,8 @@ static int tonga_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } From d2e312183b62cde0c44af35664f3b104b247dd9c Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 2 Jun 2016 19:11:01 +0800 Subject: [PATCH 0412/1050] drm/amd/powerplay: fix bug visit array out of bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rex Zhu Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c | 2 +- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 24a16e49b571..586f73276226 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c @@ -1830,7 +1830,7 @@ static uint16_t fiji_find_closest_vddci(struct pp_hwmgr *hwmgr, uint16_t vddci) PP_ASSERT_WITH_CODE(false, "VDDCI is larger than max VDDCI in VDDCI Voltage Table!", - return vddci_table->entries[i].value); + return vddci_table->entries[i-1].value); } static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 910d56dcd7b1..20f20e075588 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -469,7 +469,7 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u PP_ASSERT_WITH_CODE(false, "VDDCI is larger than max VDDCI in VDDCI Voltage Table!", - return vddci_table->entries[i].value); + return vddci_table->entries[i-1].value); } int phm_find_boot_level(void *table, From 5f96ddb4607382528ef2eb23b49ce1856fdb316d Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 3 Jun 2016 19:12:42 +0800 Subject: [PATCH 0413/1050] drm/amd/powerplay: delete useless code as pptable changed in vbios. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vbios table changed so this code is useless now. Signed-off-by: Rex Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index 16fed487973b..d27e8c40602a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c @@ -2847,27 +2847,6 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) } } - /* Initialize Vddc DPM table based on allow Vddc values. And populate corresponding std values. */ - for (i = 0; i < allowed_vdd_sclk_table->count; i++) { - data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddc; - /* tonga_hwmgr->dpm_table.VddcTable.dpm_levels[i].param1 = stdVoltageTable->entries[i].Leakage; */ - /* param1 is for corresponding std voltage */ - data->dpm_table.vddc_table.dpm_levels[i].enabled = 1; - } - data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count; - - if (NULL != allowed_vdd_mclk_table) { - /* Initialize Vddci DPM table based on allow Mclk values */ - for (i = 0; i < allowed_vdd_mclk_table->count; i++) { - data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddci; - data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1; - data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].mvdd; - data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1; - } - data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count; - data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count; - } - /* setup PCIE gen speed levels*/ tonga_setup_default_pcie_tables(hwmgr); From 624531886987f0f1b5d01fb598034d039198e090 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:57:54 +0100 Subject: [PATCH 0414/1050] ARM: 8578/1: mm: ensure pmd_present only checks the valid bit In a subsequent patch, pmd_mknotpresent will clear the valid bit of the pmd entry, resulting in a not-present entry from the hardware's perspective. Unfortunately, pmd_present simply checks for a non-zero pmd value and will therefore continue to return true even after a pmd_mknotpresent operation. Since pmd_mknotpresent is only used for managing huge entries, this is only an issue for the 3-level case. This patch fixes the 3-level pmd_present implementation to take into account the valid bit. For bisectability, the change is made before the fix to pmd_mknotpresent. [catalin.marinas@arm.com: comment update regarding pmd_mknotpresent patch] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Cc: Russell King Cc: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/include/asm/pgtable.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index aeddd28b3595..92fd2c8a9af0 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -193,6 +193,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) +#define pmd_present(pmd) (pmd_val(pmd)) #define copy_pmd(pmdpd,pmdps) \ do { \ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fa70db7c714b..4dce1580bc15 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -211,6 +211,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) : !!(pmd_val(pmd) & (val))) #define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val))) +#define pmd_present(pmd) (pmd_isset((pmd), L_PMD_SECT_VALID)) #define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) #define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL)) static inline pte_t pte_mkspecial(pte_t pte) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 348caabb7625..d62204060cbe 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -182,7 +182,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) static inline pte_t *pmd_page_vaddr(pmd_t pmd) { From 56530f5d2ddc9b9fade7ef8db9cb886e9dc689b5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 Jun 2016 17:58:06 +0100 Subject: [PATCH 0415/1050] ARM: 8579/1: mm: Fix definition of pmd_mknotpresent Currently pmd_mknotpresent will use a zero entry to respresent an invalidated pmd. Unfortunately this definition clashes with pmd_none, thus it is possible for a race condition to occur if zap_pmd_range sees pmd_none whilst __split_huge_pmd_locked is running too with pmdp_invalidate just called. This patch fixes the race condition by modifying pmd_mknotpresent to create non-zero faulting entries (as is done in other architectures), removing the ambiguity with pmd_none. [catalin.marinas@arm.com: using L_PMD_SECT_VALID instead of PMD_TYPE_SECT] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Reported-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Russell King Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-3level.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 4dce1580bc15..2a029bceaf2f 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -250,10 +250,10 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) -/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */ +/* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */ static inline pmd_t pmd_mknotpresent(pmd_t pmd) { - return __pmd(0); + return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) From be94535f95313a013b844b563ef15ddd8fb43da8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:39 +0200 Subject: [PATCH 0416/1050] mlxsw: spectrum: Make split flow match firmware requirements When a port is created following a split / unsplit we need to map it to the correct module and lane, enable it and then continue to initialize its various parameters such as MTU and VLAN filters. Under certain conditions, such as trying to split ports at the bottom row of the front panel by four, we get firmware errors. After evaluating this with the firmware team it was decided to alter the split / unsplit flow, so that first all the affected ports are mapped, then enabled and finally each is initialized separately. Fix the split / unsplit flow by first mapping and enabling all the affected ports. Newer firmware versions will support both flows. Fixes: 18f1e70c4137 ("mlxsw: spectrum: Introduce port splitting") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 147 +++++++++++------- 1 file changed, 95 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4a7273771028..cc62d4c64fb5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -247,13 +247,21 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); } +static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 swid) +{ + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); +} + static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char pspa_pl[MLXSW_REG_PSPA_LEN]; - mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); + return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port, + swid); } static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1681,8 +1689,8 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1839,28 +1847,6 @@ err_port_active_vlans_alloc: return err; } -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) -{ - int err; - - err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, - lane); - if (err) - return err; - - err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module, - width); - if (err) - goto err_port_create; - - return 0; - -err_port_create: - mlxsw_sp_port_module_unmap(mlxsw_sp, local_port); - return err; -} - static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; @@ -1927,7 +1913,7 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); if (err) goto err_port_create; } @@ -1948,12 +1934,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) return local_port - offset; } +static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, + u8 module, unsigned int count) +{ + u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + int err, i; + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module, + width, i * width); + if (err) + goto err_port_module_map; + } + + for (i = 0; i < count; i++) { + err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0); + if (err) + goto err_port_swid_set; + } + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, + module, width); + if (err) + goto err_port_create; + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + i = count; +err_port_swid_set: + for (i--; i >= 0; i--) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, + MLXSW_PORT_SWID_DISABLED_PORT); + i = count; +err_port_module_map: + for (i--; i >= 0; i--) + mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i); + return err; +} + +static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, + u8 base_port, unsigned int count) +{ + u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH; + int i; + + /* Split by four means we need to re-create two ports, otherwise + * only one. + */ + count = count / 2; + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, + 0); + } + + for (i = 0; i < count; i++) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0); + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, + width); + } +} + static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; u8 module, cur_width, base_port; int i; int err; @@ -2001,25 +2060,16 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count; i++) { - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width, i * width); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n"); - goto err_port_create; - } + err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); + goto err_port_split_create; } return 0; -err_port_create: - for (i--; i >= 0; i--) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, 0); - } +err_port_split_create: + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return err; } @@ -2061,14 +2111,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, - 0); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n"); - } + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return 0; } From d664b41e2adf5851e4d0d39f450b2f3f808b65d6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:40 +0200 Subject: [PATCH 0417/1050] mlxsw: spectrum: Don't sleep during ndo_get_phys_port_name() When rtnl_fill_ifinfo() is called for a certain netdevice it queries its various parameters such as switch id and physical port name. The function might get called in an atomic context, which means the underlying driver must not sleep during the query operation. Don't query the device and sleep during ndo_get_phys_port_name(), but instead store the needed parameters in port creation time. Fixes: 2bf9a58675c5 ("mlxsw: spectrum: Add support for physical port names") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 64 ++++++------------- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 ++ 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cc62d4c64fb5..6f9e3ddff4a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -313,9 +313,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width, u8 *p_lane) +static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width, u8 *p_lane) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; @@ -330,16 +330,6 @@ static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width) -{ - u8 lane; - - return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module, - p_width, &lane); -} - static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 module, u8 width, u8 lane) { @@ -957,17 +947,11 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - u8 module, width, lane; + u8 module = mlxsw_sp_port->mapping.module; + u8 width = mlxsw_sp_port->mapping.width; + u8 lane = mlxsw_sp_port->mapping.lane; int err; - err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - &module, &width, &lane); - if (err) { - netdev_err(dev, "Failed to retrieve module information\n"); - return err; - } - if (!mlxsw_sp_port->split) err = snprintf(name, len, "p%d", module + 1); else @@ -1690,7 +1674,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) + bool split, u8 module, u8 width, u8 lane) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1705,6 +1689,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; mlxsw_sp_port->split = split; + mlxsw_sp_port->mapping.module = module; + mlxsw_sp_port->mapping.width = width; + mlxsw_sp_port->mapping.lane = lane; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -1895,8 +1882,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + u8 module, width, lane; size_t alloc_size; - u8 module, width; int i; int err; @@ -1907,13 +1894,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, - &width); + &width, &lane); if (err) goto err_port_module_info_get; if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width, + lane); if (err) goto err_port_create; } @@ -1955,7 +1943,7 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, for (i = 0; i < count; i++) { err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width); + module, width, i * width); if (err) goto err_port_create; } @@ -2004,7 +1992,7 @@ static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, module = mlxsw_sp->port_to_module[local_port]; mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, - width); + width, 0); } } @@ -2024,18 +2012,14 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, return -EINVAL; } + module = mlxsw_sp_port->mapping.module; + cur_width = mlxsw_sp_port->mapping.width; + if (count != 2 && count != 4) { netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } - if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); return -EINVAL; @@ -2077,10 +2061,9 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 module, cur_width, base_port; + u8 cur_width, base_port; unsigned int count; int i; - int err; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { @@ -2094,12 +2077,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } + cur_width = mlxsw_sp_port->mapping.width; count = cur_width == 1 ? 4 : 2; base_port = mlxsw_sp_cluster_base_port_get(local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e2c022d3e2f3..13b30eaa13d4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -229,6 +229,11 @@ struct mlxsw_sp_port { struct ieee_maxrate *maxrate; struct ieee_pfc *pfc; } dcb; + struct { + u8 module; + u8 width; + u8 lane; + } mapping; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; From 8c237cd0ccb570d13158758af02e11359a4a5b1c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 9 Jun 2016 15:50:49 +1000 Subject: [PATCH 0418/1050] drivers/of: Fix depth for sub-tree blob in unflatten_dt_nodes() The function is unflattening device sub-tree blob if @dad passed to the function is valid. Currently, this functionality is used by PPC PowerNV PCI hotplug driver only. There are possibly multiple nodes in the first level of depth, fdt_next_node() bails immediately when @depth becomes negative before the second device node can be probed successfully. It leads to the device nodes except the first one won't be unflattened successfully. This fixes the issue by setting the initial depth (@inital_depth) to 1 when this function is called to unflatten device sub-tree blob. No logic changes when this function is used to unflatten non-sub-tree blob. Cc: Rhyland Klein Fixes: 78c44d910 ("drivers/of: Fix depth when unflattening devicetree") Signed-off-by: Gavin Shan Tested-by: Rhyland Klein Tested-by: Andrew Donnellan Signed-off-by: Rob Herring --- drivers/of/fdt.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 14f2f8c7c260..33daffc4392c 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -395,7 +395,7 @@ static int unflatten_dt_nodes(const void *blob, struct device_node **nodepp) { struct device_node *root; - int offset = 0, depth = 0; + int offset = 0, depth = 0, initial_depth = 0; #define FDT_MAX_DEPTH 64 unsigned int fpsizes[FDT_MAX_DEPTH]; struct device_node *nps[FDT_MAX_DEPTH]; @@ -405,11 +405,22 @@ static int unflatten_dt_nodes(const void *blob, if (nodepp) *nodepp = NULL; + /* + * We're unflattening device sub-tree if @dad is valid. There are + * possibly multiple nodes in the first level of depth. We need + * set @depth to 1 to make fdt_next_node() happy as it bails + * immediately when negative @depth is found. Otherwise, the device + * nodes except the first one won't be unflattened successfully. + */ + if (dad) + depth = initial_depth = 1; + root = dad; fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0; nps[depth] = dad; + for (offset = 0; - offset >= 0 && depth >= 0; + offset >= 0 && depth >= initial_depth; offset = fdt_next_node(blob, offset, &depth)) { if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH)) continue; From 06dfeef88573cf032e5c27e37f80ff5237b3318a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 11:38:34 +0100 Subject: [PATCH 0419/1050] drivers: of: add definition of early_init_dt_alloc_reserved_memory_arch The function early_init_dt_alloc_reserved_memory_arch is defined in drivers/of/of_reserved_mem.c but is not declared in any of the header files. Add the declaration of this to avoid the warning: drivers/of/of_reserved_mem.c:31:19: warning: symbol 'early_init_dt_alloc_reserved_memory_arch' was not declared. Should it be static? Signed-off-by: Ben Dooks [robh: drop extern from declaration] Signed-off-by: Rob Herring --- include/linux/of_reserved_mem.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index ad2f67054372..c201060e0c6d 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -31,6 +31,13 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); int of_reserved_mem_device_init(struct device *dev); void of_reserved_mem_device_release(struct device *dev); +int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, + phys_addr_t align, + phys_addr_t start, + phys_addr_t end, + bool nomap, + phys_addr_t *res_base); + void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); From a7ae81952cdab56a1277bd2f9ed7284c0f575120 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 9 Jun 2016 16:56:28 +0300 Subject: [PATCH 0420/1050] i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many Intel systems the BIOS declares a SystemIO OpRegion below the SMBus PCI device as can be seen in ACPI DSDT table from Lenovo Yoga 900: Device (SBUS) { OperationRegion (SMBI, SystemIO, (SBAR << 0x05), 0x10) Field (SMBI, ByteAcc, NoLock, Preserve) { HSTS, 8, Offset (0x02), HCON, 8, HCOM, 8, TXSA, 8, DAT0, 8, DAT1, 8, HBDR, 8, PECR, 8, RXSA, 8, SDAT, 16 } There are also bunch of AML methods that that the BIOS can use to access these fields. Most of the systems in question AML methods accessing the SMBI OpRegion are never used. Now, because of this SMBI OpRegion many systems fail to load the SMBus driver with an error looking like one below: ACPI Warning: SystemIO range 0x0000000000003040-0x000000000000305F conflicts with OpRegion 0x0000000000003040-0x000000000000304F (\_SB.PCI0.SBUS.SMBI) (20160108/utaddress-255) ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver The reason is that this SMBI OpRegion conflicts with the PCI BAR used by the SMBus driver. It turns out that we can install a custom SystemIO address space handler for the SMBus device to intercept all accesses through that OpRegion. This allows us to share the PCI BAR with the AML code if it for some reason is using it. We do not expect that this OpRegion handler will ever be called but if it is we print a warning and prevent all access from the SMBus driver itself. Link: https://bugzilla.kernel.org/show_bug.cgi?id=110041 Reported-by: Andy Lutomirski Reported-by: Pali Rohár Suggested-by: Rafael J. Wysocki Signed-off-by: Mika Westerberg Acked-by: Rafael J. Wysocki Reviewed-by: Jean Delvare Reviewed-by: Benjamin Tissoires Tested-by: Pali Rohár Tested-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org --- drivers/i2c/busses/i2c-i801.c | 99 +++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 64b1208bca5e..4a60ad214747 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -245,6 +245,13 @@ struct i801_priv { struct platform_device *mux_pdev; #endif struct platform_device *tco_pdev; + + /* + * If set to true the host controller registers are reserved for + * ACPI AML use. Protected by acpi_lock. + */ + bool acpi_reserved; + struct mutex acpi_lock; }; #define FEATURE_SMBUS_PEC (1 << 0) @@ -718,6 +725,12 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, int ret = 0, xact = 0; struct i801_priv *priv = i2c_get_adapdata(adap); + mutex_lock(&priv->acpi_lock); + if (priv->acpi_reserved) { + mutex_unlock(&priv->acpi_lock); + return -EBUSY; + } + pm_runtime_get_sync(&priv->pci_dev->dev); hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC) @@ -820,6 +833,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, out: pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); + mutex_unlock(&priv->acpi_lock); return ret; } @@ -1257,6 +1271,83 @@ static void i801_add_tco(struct i801_priv *priv) priv->tco_pdev = pdev; } +#ifdef CONFIG_ACPI +static acpi_status +i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, + u64 *value, void *handler_context, void *region_context) +{ + struct i801_priv *priv = handler_context; + struct pci_dev *pdev = priv->pci_dev; + acpi_status status; + + /* + * Once BIOS AML code touches the OpRegion we warn and inhibit any + * further access from the driver itself. This device is now owned + * by the system firmware. + */ + mutex_lock(&priv->acpi_lock); + + if (!priv->acpi_reserved) { + priv->acpi_reserved = true; + + dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); + dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n"); + + /* + * BIOS is accessing the host controller so prevent it from + * suspending automatically from now on. + */ + pm_runtime_get_sync(&pdev->dev); + } + + if ((function & ACPI_IO_MASK) == ACPI_READ) + status = acpi_os_read_port(address, (u32 *)value, bits); + else + status = acpi_os_write_port(address, (u32)*value, bits); + + mutex_unlock(&priv->acpi_lock); + + return status; +} + +static int i801_acpi_probe(struct i801_priv *priv) +{ + struct acpi_device *adev; + acpi_status status; + + adev = ACPI_COMPANION(&priv->pci_dev->dev); + if (adev) { + status = acpi_install_address_space_handler(adev->handle, + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler, + NULL, priv); + if (ACPI_SUCCESS(status)) + return 0; + } + + return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]); +} + +static void i801_acpi_remove(struct i801_priv *priv) +{ + struct acpi_device *adev; + + adev = ACPI_COMPANION(&priv->pci_dev->dev); + if (!adev) + return; + + acpi_remove_address_space_handler(adev->handle, + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler); + + mutex_lock(&priv->acpi_lock); + if (priv->acpi_reserved) + pm_runtime_put(&priv->pci_dev->dev); + mutex_unlock(&priv->acpi_lock); +} +#else +static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; } +static inline void i801_acpi_remove(struct i801_priv *priv) { } +#endif + static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) { unsigned char temp; @@ -1274,6 +1365,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) priv->adapter.dev.parent = &dev->dev; ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev)); priv->adapter.retries = 3; + mutex_init(&priv->acpi_lock); priv->pci_dev = dev; switch (dev->device) { @@ -1336,10 +1428,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) return -ENODEV; } - err = acpi_check_resource_conflict(&dev->resource[SMBBAR]); - if (err) { + if (i801_acpi_probe(priv)) return -ENODEV; - } err = pcim_iomap_regions(dev, 1 << SMBBAR, dev_driver_string(&dev->dev)); @@ -1348,6 +1438,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) "Failed to request SMBus region 0x%lx-0x%Lx\n", priv->smba, (unsigned long long)pci_resource_end(dev, SMBBAR)); + i801_acpi_remove(priv); return err; } @@ -1412,6 +1503,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) err = i2c_add_adapter(&priv->adapter); if (err) { dev_err(&dev->dev, "Failed to add SMBus adapter\n"); + i801_acpi_remove(priv); return err; } @@ -1438,6 +1530,7 @@ static void i801_remove(struct pci_dev *dev) i801_del_mux(priv); i2c_del_adapter(&priv->adapter); + i801_acpi_remove(priv); pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg); platform_device_unregister(priv->tco_pdev); From 908cf12bbca0f18a23085a5a35301509e034f0a9 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 8 Jun 2016 08:51:17 +0200 Subject: [PATCH 0421/1050] i2c: octeon: Missing AAK flag in case of I2C_M_RECV_LEN During receive the controller requires the AAK flag for all bytes but the final one. This was wrong in case of I2C_M_RECV_LEN, where the decision if the final byte is to be transmitted happened before adding the additional received length byte. Set the AAK flag if additional bytes are to be received. Signed-off-by: Jan Glauber Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-octeon.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c index aa5f01efd826..1922e4a16e92 100644 --- a/drivers/i2c/busses/i2c-octeon.c +++ b/drivers/i2c/busses/i2c-octeon.c @@ -934,8 +934,15 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, return result; for (i = 0; i < length; i++) { - /* for the last byte TWSI_CTL_AAK must not be set */ - if (i + 1 == length) + /* + * For the last byte to receive TWSI_CTL_AAK must not be set. + * + * A special case is I2C_M_RECV_LEN where we don't know the + * additional length yet. If recv_len is set we assume we're + * not reading the final byte and therefore need to set + * TWSI_CTL_AAK. + */ + if ((i + 1 == length) && !(recv_len && i == 0)) final_read = true; /* clear iflg to allow next event */ From 8913f8d2930368f30998e60851259606eeed2c49 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 8 Jun 2016 08:51:19 +0200 Subject: [PATCH 0422/1050] i2c: octeon: Avoid printk after too long SMBUS message Remove the warning about a too long SMBUS message because the ipmi_ssif driver triggers this warning too frequently so it spams the message log. Signed-off-by: Jan Glauber Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-octeon.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c index 1922e4a16e92..30ae35146723 100644 --- a/drivers/i2c/busses/i2c-octeon.c +++ b/drivers/i2c/busses/i2c-octeon.c @@ -957,12 +957,8 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, data[i] = octeon_i2c_data_read(i2c); if (recv_len && i == 0) { - if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) { - dev_err(i2c->dev, - "%s: read len > I2C_SMBUS_BLOCK_MAX %d\n", - __func__, data[i]); + if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) return -EPROTO; - } length += data[i]; } From edb50a5403d2e2d2b2b63a8365c4378c9c300ed6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 10 May 2016 15:14:28 +0200 Subject: [PATCH 0423/1050] NVMe: Only release requested regions The NVMe driver only requests the PCIe device's memory regions but releases all possible regions (including eventual I/O regions). This leads to a stale warning entry in dmesg about freeing non existent resources. Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 78dca3193ca4..befac5b19490 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1679,9 +1679,14 @@ static int nvme_pci_enable(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev) { + struct pci_dev *pdev = to_pci_dev(dev->dev); + int bars; + if (dev->bar) iounmap(dev->bar); - pci_release_regions(to_pci_dev(dev->dev)); + + bars = pci_select_bars(pdev, IORESOURCE_MEM); + pci_release_selected_regions(pdev, bars); } static void nvme_pci_disable(struct nvme_dev *dev) @@ -1924,7 +1929,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return 0; release: - pci_release_regions(pdev); + pci_release_selected_regions(pdev, bars); return -ENODEV; } From 2e9328493f89a5a06ea0ecb0b7763d61930a682a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 8 Jun 2016 15:21:39 +0900 Subject: [PATCH 0424/1050] i2c: mux: refer to i2c-mux.txt Correct references to i2c-mux.txt which was previously mux.txt. Also correct the spelling of relevant. Signed-off-by: Simon Horman Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt | 4 ++-- Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt | 3 ++- Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt | 6 +++--- Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt | 4 ++-- Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt | 6 +++--- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt index bfeabb843941..71191ff0e781 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt @@ -44,8 +44,8 @@ Required properties: - our-claim-gpio: The GPIO that we use to claim the bus. - their-claim-gpios: The GPIOs that the other sides use to claim the bus. Note that some implementations may only support a single other master. -- Standard I2C mux properties. See mux.txt in this directory. -- Single I2C child bus node at reg 0. See mux.txt in this directory. +- Standard I2C mux properties. See i2c-mux.txt in this directory. +- Single I2C child bus node at reg 0. See i2c-mux.txt in this directory. Optional properties: - slew-delay-us: microseconds to wait for a GPIO to go high. Default is 10 us. diff --git a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt index 6078aefe7ed4..7ce23ac61308 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt @@ -27,7 +27,8 @@ Required properties: - i2c-bus-name: The name of this bus. Also needed as pinctrl-name for the I2C parents. -Furthermore, I2C mux properties and child nodes. See mux.txt in this directory. +Furthermore, I2C mux properties and child nodes. See i2c-mux.txt in this +directory. Example: diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt index 66709a825541..21da3ecbb370 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt @@ -22,8 +22,8 @@ Required properties: - i2c-parent: The phandle of the I2C bus that this multiplexer's master-side port is connected to. - mux-gpios: list of gpios used to control the muxer -* Standard I2C mux properties. See mux.txt in this directory. -* I2C child bus nodes. See mux.txt in this directory. +* Standard I2C mux properties. See i2c-mux.txt in this directory. +* I2C child bus nodes. See i2c-mux.txt in this directory. Optional properties: - idle-state: value to set the muxer to when idle. When no value is @@ -33,7 +33,7 @@ For each i2c child node, an I2C child bus will be created. They will be numbered based on their order in the device tree. Whenever an access is made to a device on a child bus, the value set -in the revelant node's reg property will be output using the list of +in the relevant node's reg property will be output using the list of GPIOs, the first in the list holding the least-significant value. If an idle state is defined, using the idle-state (optional) property, diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt index ae8af1694e95..33119a98e144 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt @@ -28,9 +28,9 @@ Also required are: * Standard pinctrl properties that specify the pin mux state for each child bus. See ../pinctrl/pinctrl-bindings.txt. -* Standard I2C mux properties. See mux.txt in this directory. +* Standard I2C mux properties. See i2c-mux.txt in this directory. -* I2C child bus nodes. See mux.txt in this directory. +* I2C child bus nodes. See i2c-mux.txt in this directory. For each named state defined in the pinctrl-names property, an I2C child bus will be created. I2C child bus numbers are assigned based on the index into diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt index 688783fbe696..de00d7fc450b 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt @@ -7,8 +7,8 @@ Required properties: - compatible: i2c-mux-reg - i2c-parent: The phandle of the I2C bus that this multiplexer's master-side port is connected to. -* Standard I2C mux properties. See mux.txt in this directory. -* I2C child bus nodes. See mux.txt in this directory. +* Standard I2C mux properties. See i2c-mux.txt in this directory. +* I2C child bus nodes. See i2c-mux.txt in this directory. Optional properties: - reg: this pair of specifies the register to control the mux. @@ -24,7 +24,7 @@ Optional properties: given, it defaults to the last value used. Whenever an access is made to a device on a child bus, the value set -in the revelant node's reg property will be output to the register. +in the relevant node's reg property will be output to the register. If an idle state is defined, using the idle-state (optional) property, whenever an access is not being made to a device on a child bus, the From 9f05e6219023116b59f6495e8c4d4ba352dd5fea Mon Sep 17 00:00:00 2001 From: Lukasz Gemborowski Date: Mon, 6 Jun 2016 15:51:50 +0200 Subject: [PATCH 0425/1050] i2c: mux: reg: Provide of_match_table of_match_table was not filled which prevents device to be instantiated from device tree node. Signed-off-by: Lukasz Gemborowski Reviewed-by: Alexander Sverdlin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 6773cadf7c9f..26e7c5187a58 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -260,6 +260,7 @@ static struct platform_driver i2c_mux_reg_driver = { .remove = i2c_mux_reg_remove, .driver = { .name = "i2c-mux-reg", + .of_match_table = of_match_ptr(i2c_mux_reg_of_match), }, }; From 67961f9db8c477026ea20ce05761bde6f8bf85b0 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 8 Jun 2016 15:33:42 -0700 Subject: [PATCH 0426/1050] mm/hugetlb: fix huge page reserve accounting for private mappings When creating a private mapping of a hugetlbfs file, it is possible to unmap pages via ftruncate or fallocate hole punch. If subsequent faults repopulate these mappings, the reserve counts will go negative. This is because the code currently assumes all faults to private mappings will consume reserves. The problem can be recreated as follows: - mmap(MAP_PRIVATE) a file in hugetlbfs filesystem - write fault in pages in the mapping - fallocate(FALLOC_FL_PUNCH_HOLE) some pages in the mapping - write fault in pages in the hole This will result in negative huge page reserve counts and negative subpool usage counts for the hugetlbfs. Note that this can also be recreated with ftruncate, but fallocate is more straight forward. This patch modifies the routines vma_needs_reserves and vma_has_reserves to examine the reserve map associated with private mappings similar to that for shared mappings. However, the reserve map semantics for private and shared mappings are very different. This results in subtly different code that is explained in the comments. Link: http://lkml.kernel.org/r/1464720957-15698-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Hillf Danton Cc: Dave Hansen Cc: Kirill Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Aneesh Kumar Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e81fea..388c2bb9b55c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) * Only the process that called mmap() has reserves for * private mappings. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return true; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + /* + * Like the shared case above, a hole punch or truncate + * could have been performed on the private mapping. + * Examine the value of chg to determine if reserves + * actually exist or were previously consumed. + * Very Subtle - The value of chg comes from a previous + * call to vma_needs_reserves(). The reserve map for + * private mappings has different (opposite) semantics + * than that of shared mappings. vma_needs_reserves() + * has already taken this difference in semantics into + * account. Therefore, the meaning of chg is the same + * as in the shared case above. Code could easily be + * combined, but keeping it separate draws attention to + * subtle differences. + */ + if (chg) + return false; + else + return true; + } return false; } @@ -1816,6 +1835,25 @@ static long __vma_reservation_common(struct hstate *h, if (vma->vm_flags & VM_MAYSHARE) return ret; + else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) { + /* + * In most cases, reserves always exist for private mappings. + * However, a file associated with mapping could have been + * hole punched or truncated after reserves were consumed. + * As subsequent fault on such a range will not use reserves. + * Subtle - The reserve map for private mappings has the + * opposite meaning than that of shared mappings. If NO + * entry is in the reserve map, it means a reservation exists. + * If an entry exists in the reserve map, it means the + * reservation has already been consumed. As a result, the + * return value of this routine is the opposite of the + * value returned from reserve map manipulation routines above. + */ + if (ret) + return 0; + else + return 1; + } else return ret < 0 ? ret : 0; } From 91a4c272145652d798035c17e1c02c91001d3f51 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 8 Jun 2016 15:33:45 -0700 Subject: [PATCH 0427/1050] kasan: change memory hot-add error messages to info messages Change the following memory hot-add error messages to info messages. There is no need for these to be errors. kasan: WARNING: KASAN doesn't support memory hot-add kasan: Memory hot-add will be disabled Link: http://lkml.kernel.org/r/1464794430-5486-1-git-send-email-shuahkh@osg.samsung.com Signed-off-by: Shuah Khan Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 18b6a2b8d183..28439acda6ec 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -763,8 +763,8 @@ static int kasan_mem_notifier(struct notifier_block *nb, static int __init kasan_memhotplug_init(void) { - pr_err("WARNING: KASAN doesn't support memory hot-add\n"); - pr_err("Memory hot-add will be disabled\n"); + pr_info("WARNING: KASAN doesn't support memory hot-add\n"); + pr_info("Memory hot-add will be disabled\n"); hotplug_memory_notifier(kasan_mem_notifier, 0); From d0db7afa1b767d95e3e14632718da5a9794129bc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 8 Jun 2016 15:33:47 -0700 Subject: [PATCH 0428/1050] revert "mm: memcontrol: fix possible css ref leak on oom" Revert commit 1383399d7be0 ("mm: memcontrol: fix possible css ref leak on oom"). Johannes points out "There is a task_in_memcg_oom() check before calling mem_cgroup_oom()". Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 58c69c94402a..75e74408cc8f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1608,7 +1608,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - if (!current->memcg_may_oom || current->memcg_in_oom) + if (!current->memcg_may_oom) return; /* * We are in the middle of the charge context here, so we From 770a5370226cb207461bbad902543381c1fad521 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 8 Jun 2016 15:33:50 -0700 Subject: [PATCH 0429/1050] mm: thp: broken page count after commit aa88b68c3b1d Christian Borntraeger reported a kernel panic after corrupt page counts, and it turned out to be a regression introduced with commit aa88b68c3b1d ("thp: keep huge zero page pinned until tlb flush"), at least on s390. put_huge_zero_page() was moved over from zap_huge_pmd() to release_pages(), and it was replaced by tlb_remove_page(). However, release_pages() might not always be triggered by (the arch-specific) tlb_remove_page(). On s390 we call free_page_and_swap_cache() from tlb_remove_page(), and not tlb_flush_mmu() -> free_pages_and_swap_cache() like the generic version, because we don't use the MMU-gather logic. Although both functions have very similar names, they are doing very unsimilar things, in particular free_page_xxx is just doing a put_page(), while free_pages_xxx calls release_pages(). This of course results in very harmful put_page()s on the huge zero page, on architectures where tlb_remove_page() is implemented in this way. It seems to affect only s390 and sh, but sh doesn't have THP support, so the problem (currently) probably only exists on s390. The following quick hack fixed the issue: Link: http://lkml.kernel.org/r/20160602172141.75c006a9@thinkpad Signed-off-by: Gerald Schaefer Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Dave Hansen Cc: Vlastimil Babka Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: [4.6.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 0d457e7db8d6..c99463ac02fb 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page) void free_page_and_swap_cache(struct page *page) { free_swap_cache(page); - put_page(page); + if (is_huge_zero_page(page)) + put_huge_zero_page(); + else + put_page(page); } /* From ba62bafe942b159a6109cbec780d36496e06b6c5 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 8 Jun 2016 15:33:53 -0700 Subject: [PATCH 0430/1050] kernel/relay.c: fix potential memory leak When relay_open_buf() fails in relay_open(), code will goto free_bufs, but chan is nowhere freed. Link: http://lkml.kernel.org/r/1464777927-19675-1-git-send-email-yizhouzhou@ict.ac.cn Signed-off-by: Zhouyi Zhou Cc: Jens Axboe Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/relay.c b/kernel/relay.c index 074994bcfa9b..04d7cf3ef8cf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -614,6 +614,7 @@ free_bufs: kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); + kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); From f3a932baa7f65072434f1c04c02c8a4d2746fcfc Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Wed, 8 Jun 2016 15:33:56 -0700 Subject: [PATCH 0431/1050] mm: introduce dedicated WQ_MEM_RECLAIM workqueue to do lru_add_drain_all This patch is based on https://patchwork.ozlabs.org/patch/574623/. Tejun submitted commit 23d11a58a9a6 ("workqueue: skip flush dependency checks for legacy workqueues") for the legacy create*_workqueue() interface. But some workq created by alloc_workqueue still reports warning on memory reclaim, e.g nvme_workq with flag WQ_MEM_RECLAIM set: workqueue: WQ_MEM_RECLAIM nvme:nvme_reset_work is flushing !WQ_MEM_RECLAIM events:lru_add_drain_per_cpu ------------[ cut here ]------------ WARNING: CPU: 0 PID: 6 at SoC/linux/kernel/workqueue.c:2448 check_flush_dependency+0xb4/0x10c ... check_flush_dependency+0xb4/0x10c flush_work+0x54/0x140 lru_add_drain_all+0x138/0x188 migrate_prep+0xc/0x18 alloc_contig_range+0xf4/0x350 cma_alloc+0xec/0x1e4 dma_alloc_from_contiguous+0x38/0x40 __dma_alloc+0x74/0x25c nvme_alloc_queue+0xcc/0x36c nvme_reset_work+0x5c4/0xda8 process_one_work+0x128/0x2ec worker_thread+0x58/0x434 kthread+0xd4/0xe8 ret_from_fork+0x10/0x50 That's because lru_add_drain_all() will schedule the drain work on system_wq, whose flag is set to 0, !WQ_MEM_RECLAIM. Introduce a dedicated WQ_MEM_RECLAIM workqueue to do lru_add_drain_all(), aiding in getting memory freed. Link: http://lkml.kernel.org/r/1464917521-9775-1-git-send-email-shhuiw@foxmail.com Signed-off-by: Wang Sheng-Hui Acked-by: Tejun Heo Cc: Keith Busch Cc: Peter Zijlstra Cc: Thierry Reding Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index 95916142fc46..59f5fafa6e1f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -667,6 +667,24 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); +/* + * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM + * workqueue, aiding in getting memory freed. + */ +static struct workqueue_struct *lru_add_drain_wq; + +static int __init lru_init(void) +{ + lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0); + + if (WARN(!lru_add_drain_wq, + "Failed to create workqueue lru_add_drain_wq")) + return -ENOMEM; + + return 0; +} +early_initcall(lru_init); + void lru_add_drain_all(void) { static DEFINE_MUTEX(lock); @@ -686,7 +704,7 @@ void lru_add_drain_all(void) pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); - schedule_work_on(cpu, work); + queue_work_on(cpu, lru_add_drain_wq, work); cpumask_set_cpu(cpu, &has_work); } } From 18aba41cbfbcd138e9f6d8d446427d8b7691c194 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Jun 2016 15:33:59 -0700 Subject: [PATCH 0432/1050] mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED I noticed that the logic in the fadvise64_64 syscall is incorrect for partial pages. While first page of the region is correctly skipped if it is partial, the last page of the region is mistakenly discarded. This leads to problems for applications that read data in non-page-aligned chunks discarding already processed data between the reads. A somewhat misguided application that does something like write(XX bytes (non-page-alligned)); drop the data it just wrote; repeat gets a significant penalty in performance as a result. Link: http://lkml.kernel.org/r/1464917140-1506698-1-git-send-email-green@linuxhacker.ru Signed-off-by: Oleg Drokin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/fadvise.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/fadvise.c b/mm/fadvise.c index b8024fa7101d..6c707bfe02fd 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -126,6 +126,17 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); + if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { + /* First page is tricky as 0 - 1 = -1, but pgoff_t + * is unsigned, so the end_index >= start_index + * check below would be true and we'll discard the whole + * file cache which is not what was asked. + */ + if (end_index == 0) + break; + + end_index--; + } if (end_index >= start_index) { unsigned long count = invalidate_mapping_pages(mapping, From a9cc4006155a68dd0940728f4f222dd035180904 Mon Sep 17 00:00:00 2001 From: Doug Oucharek Date: Thu, 9 Jun 2016 18:45:45 -0400 Subject: [PATCH 0433/1050] staging: lustre: lnet: Don't access NULL NI on failure path In kiblnd_passive_connect(), if we are failing the connection attempt because we cannot find a valid NI (we have a NULL NI), we were coring after the "goto fail" because the failure path was assuming non-NULL NI. This patch ensures we don't dereference a NULL NI on that failure path. Signed-off-by: Doug Oucharek Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8022 Reviewed-on: http://review.whamcloud.com/19614 Reviewed-by: Dmitry Eremin Reviewed-by: James Simmons Reviewed-by: Matt Ezell Reviewed-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index bbfee53cfcf5..845e49a52430 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -2521,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) return 0; failed: - if (ni) + if (ni) { lnet_ni_decref(ni); + rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); + rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); + } rej.ibr_version = version; - rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); - rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); kiblnd_reject(cmid, &rej); return -ECONNREFUSED; From 29ccf7590ec49647b3442b6b2c64c4406a931c80 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Jun 2016 11:40:49 +1000 Subject: [PATCH 0434/1050] drm/amdgpu: fix warning with powerplay disabled. This just fixes a warning when you disable powerplay. Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 10b1be51318b..82256558e0f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -218,9 +218,9 @@ static int amdgpu_pp_hw_fini(void *handle) static void amdgpu_pp_late_fini(void *handle) { +#ifdef CONFIG_DRM_AMD_POWERPLAY struct amdgpu_device *adev = (struct amdgpu_device *)handle; -#ifdef CONFIG_DRM_AMD_POWERPLAY if (adev->pp_enabled) { amdgpu_pm_sysfs_fini(adev); amd_powerplay_fini(adev->powerplay.pp_handle); From 8017ea35d33f9e0950d369773ab48bcb1efb9ba0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Jun 2016 21:50:50 +1000 Subject: [PATCH 0435/1050] powerpc/nohash: Fix build break with 64K pages Commit 74701d5947a6 "powerpc/mm: Rename function to indicate we are allocating fragments" renamed page_table_free() to pte_fragment_free(). One occurrence was mistyped as pte_fragment_fre(). This only breaks the nohash 64K page build, which is not the default or enabled in any defconfig. Fixes: 74701d5947a6 ("powerpc/mm: Rename function to indicate we are allocating fragments") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 0c12a3bfe2ab..069369f6414b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -172,7 +172,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - pte_fragment_fre((unsigned long *)pte, 1); + pte_fragment_free((unsigned long *)pte, 1); } static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) From 418f8399a8bedf376ec13eb01088f04a76ebdd6f Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:28 +0300 Subject: [PATCH 0436/1050] net/mlx5: Fix the size of modify QP mailbox Add 16 reserved bytes at the end of mlx5_modify_qp_mbox_in to match the hardware spec definition. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..1532dcf6fc5e 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -560,6 +560,7 @@ struct mlx5_modify_qp_mbox_in { __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; + u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { From 9cd3411c42c5d5ba55d6e745edfe7df53c1ffa41 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:29 +0300 Subject: [PATCH 0437/1050] net/mlx5: Fix masking of reserved bits in XRCD number Mask the reserved bits when reading the number of newly created XRCD. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b720a274220d..b82d65802d96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) if (out.hdr.status) err = mlx5_cmd_status_to_err(&out.hdr); else - *xrcdn = be32_to_cpu(out.xrcdn); + *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; return err; } From 86d56a1a6b7352542661d8a9463758c7f285fce3 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Fri, 10 Jun 2016 00:07:30 +0300 Subject: [PATCH 0438/1050] net/mlx5: Fix MLX5_CMD_OP_MAX to be defined correctly Having MLX5_CMD_OP_MAX on another file causes us to repeatedly miss accounting new commands added to the driver and hence there're no entries for them in debugfs. To solve that, we integrate it into the commands enum as the last entry. Fixes: 34a40e689393 ('net/mlx5_core: Introduce modify flow table command') Signed-off-by: Shahar Klein Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 -- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 035abdf62cfe..51f0caf299d8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1240,8 +1240,6 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; -#define MLX5_CMD_OP_MAX 0x920 - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7e5890..986a615f623c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -205,7 +205,8 @@ enum { MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, - MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_MAX }; struct mlx5_ifc_flow_table_fields_supported_bits { From 2fee37a47cebc26d58eec5dafc8ba787a6ee5350 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:31 +0300 Subject: [PATCH 0439/1050] net/mlx5: Fix root flow table update When we destroy the last flow table we need to update the root_ft to NULL. It fixes an issue for when the last flow table is destroyed and recreated again, root_ft pointer will not be updated, as a result traffic will be dropped. Fixes: 2cc43b494a6c ('net/mlx5_core: Managing root flow table') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8b5f0b2c0d5c..fa6fec1930f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1292,8 +1292,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) ft->id); return err; } - root->root_ft = new_root_ft; } + root->root_ft = new_root_ft; return 0; } From 876d634d19e41603aab91455f2c52a78a28372d5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:32 +0300 Subject: [PATCH 0440/1050] net/mlx5: Fix flow steering NIC capabilities check Flow steering infrastructure is currently used only on link layer ethernet, therefore the driver should initialize the flow steering when the device link layer is ethernet. In addition, add missing capability check before initializing the namespace of NIC RX flow tables. Fixes: 2530236303d9 ('net/mlx5_core: Flow steering tree initialization') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++++++- include/linux/mlx5/device.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fa6fec1930f5..c1efa5517d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + cleanup_root_ns(dev); cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); @@ -1828,15 +1831,20 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) { int err = 0; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_init_fc_stats(dev); if (err) return err; - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(dev); if (err) goto err; } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { err = init_fdb_root_ns(dev); if (err) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 51f0caf299d8..73a48479892d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1367,6 +1367,12 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) From bd02ef8eec0b98abe6d5efe280c87903b2eb9874 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:33 +0300 Subject: [PATCH 0441/1050] net/mlx5: Fix E-Switch flow steering capabilities check Add missing capabilities check for E-Switch FDB and ACLs flow tables before creating their namespace in flow steering. Fixes: efdc810ba39d ('net/mlx5: Flow steering, Add vport ACL support') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c1efa5517d17..e912a3d2505e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1846,19 +1846,21 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = init_fdb_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acl_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acl_root_ns(dev); - if (err) - goto err; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acl_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acl_root_ns(dev); + if (err) + goto err; + } } return 0; From 3fe3d819d5015d56d0d7289ae16db5e612640c5b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:34 +0300 Subject: [PATCH 0442/1050] net/mlx5: E-Switch, Use the correct free() function We must use kvfree() for something that could have been allocated with vzalloc(), do that. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b84a6918a700..537479641c8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -529,7 +529,7 @@ out: } } - kfree(flow_group_in); + kvfree(flow_group_in); return err; } @@ -1097,7 +1097,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->egress.drop_grp = drop_grp; vport->egress.allowed_vlans_grp = vlan_grp; out: - kfree(flow_group_in); + kvfree(flow_group_in); if (err && !IS_ERR_OR_NULL(vlan_grp)) mlx5_destroy_flow_group(vlan_grp); if (err && !IS_ERR_OR_NULL(acl)) @@ -1259,7 +1259,7 @@ out: mlx5_destroy_flow_table(vport->ingress.acl); } - kfree(flow_group_in); + kvfree(flow_group_in); } static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, From 3f42ac6648723e906c1c10edc0c523aff29963cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:35 +0300 Subject: [PATCH 0443/1050] net/mlx5: E-Switch, Use the correct error check on returned pointers The mlx5 flow-steering API (mlx5_create_flow_table/group/rule) never returns null pointer on error. Even if it was doing that, checking for IS_ERR_OR_NULL(p) and then returning PTR_ERR(p) would have cause bugs, since PTR_ERR(NULL) --> success, crash. To make things more robust and protect against related future bugs, convert all IS_ERR_OR_NULL checks on returned values to IS_ERR. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 537479641c8e..a350af221d15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -383,7 +383,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); - if (IS_ERR_OR_NULL(flow_rule)) { + if (IS_ERR(flow_rule)) { pr_warn( "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); @@ -457,7 +457,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); - if (IS_ERR_OR_NULL(fdb)) { + if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); goto out; @@ -474,7 +474,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); eth_broadcast_addr(dmac); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; @@ -489,7 +489,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) eth_zero_addr(dmac); dmac[0] = 0x01; g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; @@ -506,7 +506,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; @@ -1060,7 +1060,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", vport->vport, err); @@ -1075,7 +1075,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); vlan_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(vlan_grp)) { + if (IS_ERR(vlan_grp)) { err = PTR_ERR(vlan_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", vport->vport, err); @@ -1086,7 +1086,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); drop_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(drop_grp)) { + if (IS_ERR(drop_grp)) { err = PTR_ERR(drop_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", vport->vport, err); @@ -1174,7 +1174,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", vport->vport, err); @@ -1192,7 +1192,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1207,7 +1207,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", vport->vport, err); @@ -1223,7 +1223,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1236,7 +1236,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", vport->vport, err); @@ -1363,7 +1363,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) { + if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", vport->vport, err); @@ -1380,7 +1380,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) { + if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", vport->vport, err); @@ -1439,7 +1439,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", vport->vport, err); @@ -1457,7 +1457,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.drop_rule)) { + if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); From 25fff58cb24c8880090d8b8ef7746001a135e876 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:36 +0300 Subject: [PATCH 0444/1050] net/mlx5: E-Switch, Fix vport enable flow Reorder vport enable flow to mark the vport as enabled before calling the vport change handler which was modified to handle the case for when vport is not enabled. This fixes the case for when the PF netdev is open before sriov is enabled, once sriov is enabled at esw_enable_vport, esw_vport_change_handle_locked didn't read the PF context since it thought the PF vport was not enabled. When we enable the vport, arming for events is not required anymore, since it's done on the vport change handle Fixes: 586cfa7f1d58 ('net/mlx5: E-Switch, Use vport event handler for vport cleanup') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a350af221d15..cfec20cffd26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1491,14 +1491,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Sync with current vport context */ vport->enabled_events = enable_events; - esw_vport_change_handle_locked(vport); - vport->enabled = true; /* only PF is trusted by default */ vport->trusted = (vport_num) ? false : true; - - arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + esw_vport_change_handle_locked(vport); esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); From 23898c763f4af6f5c80b0230b1ea788a0ce3cf73 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Fri, 10 Jun 2016 00:07:37 +0300 Subject: [PATCH 0445/1050] net/mlx5: E-Switch, Modify node guid on vf set MAC In RoCE, the RDMA-CM needs the node guid to establish connection between nodes. Today, the node guid exposed to mlx5 Ethernet VFs is zero, therefore RDMA-CM on the VF is broken. Whenever the administrator sets a MAC for a VF, derive the node guid from it and set it as well in the following way: MAC: e4:1d:2d:b3:f4:01 -> node_guid: e4:1d:2d:ff:fe:b3:f4:01 Fixes: 77256579c6b43 ('net/mlx5: E-Switch, Introduce Vport...') Signed-off-by: Noa Osherovich Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 23 ++++++++++- .../net/ethernet/mellanox/mlx5/core/vport.c | 38 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 9 ++++- include/linux/mlx5/vport.h | 2 + 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index cfec20cffd26..9b1855b199a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1725,11 +1725,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { - int err = 0; struct mlx5_vport *evport; + u64 node_guid; + int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1753,11 +1766,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + mutex_lock(&esw->state_lock); if (evport->enabled) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b69dadcfb897..daf44cd4c566 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -508,6 +508,44 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + u8 *guid; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) + return -ENOTSUPP; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, + node_guid); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 986a615f623c..e955a2859009 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -501,7 +501,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x1b]; + u8 reserved_at_5[0x19]; + u8 nic_vport_node_guid_modify[0x1]; + u8 nic_vport_port_guid_modify[0x1]; u8 reserved_at_20[0x7e0]; }; @@ -4584,7 +4586,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x19]; + u8 reserved_at_0[0x16]; + u8 node_guid[0x1]; + u8 port_guid[0x1]; + u8 reserved_at_18[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 301da4a5e6bf..6c16c198f680 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -50,6 +50,8 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, From 62e3c24ac4f0ee307c41a3652636f88a3f8d165c Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:38 +0300 Subject: [PATCH 0446/1050] net/mlx5: E-Switch, always set mc_promisc for allmulti vports Set the mc_promisc flag also in the case of adding new mc address to existing allmulti vport. Fixes: a35f71f27a61 ('net/mlx5: E-Switch, Implement promiscuous rx modes vf request handling') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9b1855b199a1..aebbd6ccb9fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -651,6 +651,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, esw_fdb_set_vport_rule(esw, mac, vport_idx); + iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: if (!iter_vaddr) From 811afeaa3797bdf4eabed286100811b33005c9e0 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 10 Jun 2016 00:07:39 +0300 Subject: [PATCH 0447/1050] net/mlx5e: Use ndo_stop explicitly at shutdown flow The current implementation copies the flow of ndo_stop instead of calling it explicitly, Fixed it. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd4392999eee..f5c8d5db25a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3192,10 +3192,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); - mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_close_locked(netdev); - mutex_unlock(&priv->state_lock); + mlx5e_close(netdev); } else { unregister_netdev(netdev); } From 0ca00fc1f808602137dc6d51f17747b3bb0fc34d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 10 Jun 2016 00:07:40 +0300 Subject: [PATCH 0448/1050] net/mlx5e: Fix blue flame quota logic Blue flame is a latency enhancement feature that allows the driver to write the packet data directly to the NIC's registers thus making the read of the packet data from host memory redundant. We maintain a quota for the blue flame which is reloaded whenever we identify that the hardware is processing send requests and processes them fast enough so by the time we post the next send request it was able to process all the pending ones. This indicates that the hardware is capable of processing more blue flame requests efficiently. The blue flame quota is decremented whenever we send using blue flame. The current code erroneously clears the budget if we did not use blue flame for the current post send operation and we fix it here. Fixes: 88a85f99e51f ('net/mlx5e: TX latency optimization to save DMA reads') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 229ab16fb8d3..b000ddc29553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -317,7 +317,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); - sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + if (bf) + sq->bf_budget--; sq->stats.packets++; sq->stats.bytes += num_bytes; From 36194812a4063dd2a72070aec3ee7890d135a587 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:50 +0530 Subject: [PATCH 0449/1050] powerpc/mm/radix: Update to tlb functions ric argument Radix invalidate control (RIC) is used to control which cache to flush using tlb instructions. When doing a PID flush, we currently flush everything including page walk cache. For address range flush, we flush only the TLB. In the next patch, we add support for flushing only the page walk cache. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 43 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 54efba2fd66e..71083621884e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -18,16 +18,20 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbiel_pid(unsigned long pid, int set) +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + +static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { - __tlbiel_pid(pid, set); + __tlbiel_pid(pid, set, ric); } return; } -static inline void _tlbie_pid(unsigned long pid) +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid) } static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, } static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -122,7 +123,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); @@ -135,7 +136,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); pid = mm ? mm->context.id : 0; if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); preempt_enable(); } @@ -172,11 +173,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(pid); + _tlbie_pid(pid, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } @@ -196,11 +197,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_va(vmaddr, pid, ap); + _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); } @@ -224,7 +225,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(0); + _tlbie_pid(0, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } From a145abf12c9f7d30d8c330c9d8a97428cbf0589b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:51 +0530 Subject: [PATCH 0450/1050] powerpc/mm/radix: Flush page walk cache when freeing page table Even though a tlb_flush() does a flush with invalidate all cache, we can end up doing an RCU page table free before calling tlb_flush(). That means we can have page walk cache entries even after we free the page table pages. This can result in us doing wrong page table walk. Avoid this by doing pwc flush on every page table free. We can't batch the pwc flush, because the rcu call back function where we free the page table pages doesn't have information of the mmu gather. Thus we have to do a pwc on every page table page freed. Note: I also removed the dummy tlb_flush_pgtable call functions for hash 32. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 - arch/powerpc/include/asm/book3s/64/pgalloc.h | 16 +++++++- .../include/asm/book3s/64/tlbflush-radix.h | 3 ++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 14 +++++++ arch/powerpc/include/asm/book3s/pgalloc.h | 5 --- arch/powerpc/mm/tlb-radix.c | 41 +++++++++++++++++++ 6 files changed, 73 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index a2350194fc76..8e21bb492dca 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..26eb2cb80c4e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -110,6 +110,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); } @@ -127,6 +132,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX); } @@ -198,7 +208,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, table, 0); } diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 13ef38828dfe..3fa94fcac628 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d98424ae356c..96e5769b18b0 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) #endif /* CONFIG_SMP */ +/* + * flush the page walk cache for the address + */ +static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + /* + * Flush the page table walk cache on freeing a page table. We already + * have marked the upper/higher level page table entry none by now. + * So it is safe to flush PWC here. + */ + if (!radix_enabled()) + return; + radix__flush_tlb_pwc(tlb, address); +} #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h index 54f591e9572e..c0a69ae92256 100644 --- a/arch/powerpc/include/asm/book3s/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/pgalloc.h @@ -4,11 +4,6 @@ #include extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_flush_pgtable(struct mmu_gather *tlb, - unsigned long address) -{ - -} #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71083621884e..ab2f60e812e2 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -128,6 +128,21 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) } EXPORT_SYMBOL(radix__local_flush_tlb_mm); +void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + + preempt_enable(); +} +EXPORT_SYMBOL(radix__local_flush_tlb_pwc); + void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { @@ -183,6 +198,32 @@ no_context: } EXPORT_SYMBOL(radix__flush_tlb_mm); +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + if (!mm_is_core_local(mm)) { + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_pid(pid, RIC_FLUSH_PWC); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } else + _tlbiel_pid(pid, RIC_FLUSH_PWC); +no_context: + preempt_enable(); +} +EXPORT_SYMBOL(radix__flush_tlb_pwc); + void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { From 143aaef8aae2b53a5ab2d0e9d48c4ff368e82b16 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 May 2016 15:47:06 +0300 Subject: [PATCH 0451/1050] drm/i915: Fix NULL pointer deference when out of PLLs in IVB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit f9476a6c6d0c ("drm/i915: Refactor platform specifics out of intel_get_shared_dpll()"), the ibx_get_dpll() function lacked an error check, that can lead to a NULL pointer dereference when trying to enable three pipes. BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 IP: [] intel_reference_shared_dpll+0x15/0x100 [i915] PGD cec87067 PUD d30ce067 PMD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: snd_hda_intel i915 drm_kms_helper drm intel_gtt sch_fq_codel cfg80211 binfmt_misc i2c_algo_bit cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea intel_rapl iosf_mbi x86_pkg_temp_thermal coretemp agpgart kvm_intel snd_hda_codec_hdmi kvm iTCO_wdt snd_hda_codec_realtek snd_hda_codec_generic irqbypass aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd psmouse pcspkr snd_hda_codec i2c_i801 snd_hwdep snd_hda_core snd_pcm snd_timer lpc_ich mfd_core snd soundcore wmi evdev tpm_tis tpm [last unloaded: drm] CPU: 3 PID: 5810 Comm: kms_flip Tainted: G U W 4.6.0-test+ #3 Hardware name: /DZ77BH-55K, BIOS BHZ7710H.86A.0100.2013.0517.0942 05/17/2013 task: ffff8800d3908040 ti: ffff8801166c8000 task.ti: ffff8801166c8000 RIP: 0010:[] [] intel_reference_shared_dpll+0x15/0x100 [i915] RSP: 0018:ffff8801166cba60 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002 RDX: 0000000000000001 RSI: ffff8800d07f1bf8 RDI: 0000000000000000 RBP: ffff8801166cba88 R08: 0000000000000002 R09: ffff8800d32e5698 R10: 0000000000000001 R11: ffff8800cc89ac88 R12: ffff8800d07f1bf8 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f4c3fc8d8c0(0000) GS:ffff88011bcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000068 CR3: 00000000d3b4c000 CR4: 00000000001406e0 Stack: 0000000000000000 ffff8800d07f1bf8 0000000000000000 ffff8800d04c0000 0000000000000000 ffff8801166cbaa8 ffffffffa04823a7 ffff8800d07f1bf8 ffff8800d32e5698 ffff8801166cbab8 ffffffffa04840cf ffff8801166cbaf0 Call Trace: [] ibx_get_dpll+0x47/0xa0 [i915] [] intel_get_shared_dpll+0x1f/0x50 [i915] [] ironlake_crtc_compute_clock+0x280/0x430 [i915] [] intel_crtc_atomic_check+0x240/0x320 [i915] [] drm_atomic_helper_check_planes+0x14e/0x1d0 [drm_kms_helper] [] intel_atomic_check+0x5dc/0x1110 [i915] [] drm_atomic_check_only+0x14a/0x660 [drm] [] ? drm_atomic_set_crtc_for_connector+0x96/0x100 [drm] [] drm_atomic_commit+0x17/0x60 [drm] [] restore_fbdev_mode+0x237/0x260 [drm_kms_helper] [] ? drm_modeset_lock_all_ctx+0x9a/0xb0 [drm] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x33/0x80 [drm_kms_helper] [] drm_fb_helper_set_par+0x2d/0x50 [drm_kms_helper] [] drm_fb_helper_hotplug_event+0xaa/0xf0 [drm_kms_helper] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x56/0x80 [drm_kms_helper] [] intel_fbdev_restore_mode+0x22/0x80 [i915] [] i915_driver_lastclose+0xe/0x20 [i915] [] drm_lastclose+0x2e/0x130 [drm] [] drm_release+0x2ac/0x4b0 [drm] [] __fput+0xed/0x1f0 [] ____fput+0xe/0x10 [] task_work_run+0x76/0xb0 [] do_exit+0x3ab/0xc60 [] ? trace_hardirqs_on_caller+0x12f/0x1c0 [] do_group_exit+0x4e/0xc0 [] SyS_exit_group+0x14/0x20 [] entry_SYSCALL_64_fastpath+0x18/0xa8 Code: 14 80 48 8d 34 90 b8 01 00 00 00 d3 e0 09 04 b3 5b 41 5c 5d c3 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 49 89 fe 41 55 41 54 53 <44> 8b 67 68 48 89 f3 48 8b be 08 02 00 00 4c 8b 2e e8 15 9d fd RIP [] intel_reference_shared_dpll+0x15/0x100 [i915] RSP CR2: 0000000000000068 Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Reported-by: Ville Syrjälä Fixes: f9476a6c6d0c ("drm/i915: Refactor platform specifics out of intel_get_shared_dpll()") Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Tested-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1463748426-5956-1-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit bb143165510661feda06fd99298b8b3a94af3046) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 3ac705936b04..baf6f5584cbd 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -366,6 +366,9 @@ ibx_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, DPLL_ID_PCH_PLL_B); } + if (!pll) + return NULL; + /* reference the pll */ intel_reference_shared_dpll(pll, crtc_state); From ca8bdaf13abbdcbb12ff12164aa2d5b522ec524d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 8 Jun 2016 19:21:17 +0100 Subject: [PATCH 0452/1050] stmmac: fix parameter to dwmac4_set_umac_addr() The dwmac4_set_umac_addr() takes a struct mac_device_info as the first parameter, but is being passed a ioaddr instead from dwmac4_set_filter(). Fix the warning/bug by changing the first parameter. drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: warning: incorrect type in argument 1 (different address spaces) drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: expected struct mac_device_info *hw drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: got void [noderef] *ioaddr Note, only compile tested this as do not have any hardware with it in. Signed-off-by: Ben Dooks Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 4f7283d05588..44da877d2483 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -156,7 +156,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, struct netdev_hw_addr *ha; netdev_for_each_uc_addr(ha, dev) { - dwmac4_set_umac_addr(ioaddr, ha->addr, reg); + dwmac4_set_umac_addr(hw, ha->addr, reg); reg++; } } From 719c44d340beeecd22cbda91b00ef55585b3c1a0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Jun 2016 12:06:34 -0400 Subject: [PATCH 0453/1050] packet: compat support for sock_fprog Socket option PACKET_FANOUT_DATA takes a struct sock_fprog as argument if PACKET_FANOUT has mode PACKET_FANOUT_CBPF. This structure contains a pointer into user memory. If userland is 32-bit and kernel is 64-bit the two disagree about the layout of struct sock_fprog. Add compat setsockopt support to convert a 32-bit compat_sock_fprog to a 64-bit sock_fprog. This is analogous to compat_sock_fprog support for SO_REUSEPORT added in commit 1957598840f4 ("soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF"). Reported-by: Daniel Borkmann Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/compat.h | 1 + net/compat.c | 17 +++++++++++++++-- net/packet/af_packet.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/net/compat.h b/include/net/compat.h index 48103cf94e97..13de0ccaa059 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,6 +42,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, unsigned int); asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, diff --git a/net/compat.c b/net/compat.c index 1373947efb50..1cd2ec046164 100644 --- a/net/compat.c +++ b/net/compat.c @@ -309,8 +309,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -static int do_set_attach_filter(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) +/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */ +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval) { struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); @@ -323,6 +323,19 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, __get_user(ptr, &fprog32->filter) || __put_user(len, &kfprog->len) || __put_user(compat_ptr(ptr), &kfprog->filter)) + return NULL; + + return kfprog; +} +EXPORT_SYMBOL_GPL(get_compat_bpf_fprog); + +static int do_set_attach_filter(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock_fprog __user *kfprog; + + kfprog = get_compat_bpf_fprog(optval); + if (!kfprog) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4040eb92d9c9..9bff6ef16fa7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,7 @@ #include #endif #include +#include #include "internal.h" @@ -3940,6 +3941,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } +#ifdef CONFIG_COMPAT +static int compat_packet_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct packet_sock *po = pkt_sk(sock->sk); + + if (level != SOL_PACKET) + return -ENOPROTOOPT; + + if (optname == PACKET_FANOUT_DATA && + po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { + optval = (char __user *)get_compat_bpf_fprog(optval); + if (!optval) + return -EFAULT; + optlen = sizeof(struct sock_fprog); + } + + return packet_setsockopt(sock, level, optname, optval, optlen); +} +#endif + static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { @@ -4416,6 +4438,9 @@ static const struct proto_ops packet_ops = { .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_packet_setsockopt, +#endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, From 50219538ffc0493a2b451a3aa0191138ef8bfe9d Mon Sep 17 00:00:00 2001 From: Shrikrishna Khare Date: Wed, 8 Jun 2016 07:40:53 -0700 Subject: [PATCH 0454/1050] vmxnet3: segCnt can be 1 for LRO packets The device emulation may send segCnt of 1 for LRO packets. Signed-off-by: Shrikrishna Khare Signed-off-by: Jin Heo Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index db8022ae415b..08885bc8d6db 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1369,7 +1369,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; segCnt = rcdlro->segCnt; - BUG_ON(segCnt <= 1); + WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index c4825392d64b..3d2b64e63408 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.7.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.8.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040700 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040800 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ From 36a5fc5c439c7a95cb9b64414fad4067da6ae3ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Jun 2016 18:08:43 +0100 Subject: [PATCH 0455/1050] drm/i915: Silence "unexpected child device config size" for VBT on 845g MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My old 845g complains that the child_device_size inside its VBT, version 110, is incorrect. Let's fiddle with the version matching such that it works with this VBT (i.e. treat BIOS v110 as having the same size as v108). Fixes [drm:intel_bios_init] *ERROR* Unexpected child device config size 27 (expected 33 for VBT version 110) Whether this is correct, no one knows - but it works for this particular machine. Signed-off-by: Chris Wilson Acked-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1464800923-6054-1-git-send-email-chris@chris-wilson.co.uk (cherry picked from commit fa05178c5dc3d1a3ad370f101cad01cf9dd3bbf9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b235b6e88ead..051b0db07efb 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1187,7 +1187,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv, } if (bdb->version < 106) { expected_size = 22; - } else if (bdb->version < 109) { + } else if (bdb->version < 111) { expected_size = 27; } else if (bdb->version < 195) { BUILD_BUG_ON(sizeof(struct old_child_dev_config) != 33); From fff7660d1e4f47dc6372ce2bd31a7b8cba0da340 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Jun 2016 08:27:50 +0100 Subject: [PATCH 0456/1050] drm/i915: Only ignore eDP ports that are connected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the VBT says that a certain port should be eDP (and hence fused off from HDMI), but in reality it isn't, we need to try and acquire the HDMI connection instead. So only trust the VBT edp setting if we can connect to an eDP device on that port. Fixes: d2182a6608 (drm/i915: Don't register HDMI connectors for eDP ports on VLV/CHV) References: https://bugs.freedesktop.org/show_bug.cgi?id=96288 Signed-off-by: Chris Wilson Tested-by: Phidias Chiang Cc: Ville Syrjälä Cc: Jani Nikula Cc: Daniel Vetter Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1464766070-31623-1-git-send-email-chris@chris-wilson.co.uk (cherry picked from commit 457c52d87e5dac9a4cf1a6a287e60ea7645067d4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 20 ++++++++++---------- drivers/gpu/drm/i915/intel_dp.c | 13 ++++++------- drivers/gpu/drm/i915/intel_drv.h | 2 +- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2113f401f0ba..996e0c37293c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14554,6 +14554,8 @@ static void intel_setup_outputs(struct drm_device *dev) if (I915_READ(PCH_DP_D) & DP_DETECTED) intel_dp_init(dev, PCH_DP_D, PORT_D); } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + bool has_edp; + /* * The DP_DETECTED bit is the latched state of the DDC * SDA pin at boot. However since eDP doesn't require DDC @@ -14563,19 +14565,17 @@ static void intel_setup_outputs(struct drm_device *dev) * eDP ports. Consult the VBT as well as DP_DETECTED to * detect eDP ports. */ - if (I915_READ(VLV_HDMIB) & SDVO_DETECTED && - !intel_dp_is_edp(dev, PORT_B)) + has_edp = intel_dp_is_edp(dev, PORT_B); + if (I915_READ(VLV_DP_B) & DP_DETECTED || has_edp) + has_edp &= intel_dp_init(dev, VLV_DP_B, PORT_B); + if (I915_READ(VLV_HDMIB) & SDVO_DETECTED && !has_edp) intel_hdmi_init(dev, VLV_HDMIB, PORT_B); - if (I915_READ(VLV_DP_B) & DP_DETECTED || - intel_dp_is_edp(dev, PORT_B)) - intel_dp_init(dev, VLV_DP_B, PORT_B); - if (I915_READ(VLV_HDMIC) & SDVO_DETECTED && - !intel_dp_is_edp(dev, PORT_C)) + has_edp = intel_dp_is_edp(dev, PORT_C); + if (I915_READ(VLV_DP_C) & DP_DETECTED || has_edp) + has_edp &= intel_dp_init(dev, VLV_DP_C, PORT_C); + if (I915_READ(VLV_HDMIC) & SDVO_DETECTED && !has_edp) intel_hdmi_init(dev, VLV_HDMIC, PORT_C); - if (I915_READ(VLV_DP_C) & DP_DETECTED || - intel_dp_is_edp(dev, PORT_C)) - intel_dp_init(dev, VLV_DP_C, PORT_C); if (IS_CHERRYVIEW(dev)) { /* eDP not supported on port D, so don't check VBT */ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f192f58708c2..2991326842f5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5923,9 +5923,9 @@ fail: return false; } -void -intel_dp_init(struct drm_device *dev, - i915_reg_t output_reg, enum port port) +bool intel_dp_init(struct drm_device *dev, + i915_reg_t output_reg, + enum port port) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_digital_port *intel_dig_port; @@ -5935,7 +5935,7 @@ intel_dp_init(struct drm_device *dev, intel_dig_port = kzalloc(sizeof(*intel_dig_port), GFP_KERNEL); if (!intel_dig_port) - return; + return false; intel_connector = intel_connector_alloc(); if (!intel_connector) @@ -5992,7 +5992,7 @@ intel_dp_init(struct drm_device *dev, if (!intel_dp_init_connector(intel_dig_port, intel_connector)) goto err_init_connector; - return; + return true; err_init_connector: drm_encoder_cleanup(encoder); @@ -6000,8 +6000,7 @@ err_encoder_init: kfree(intel_connector); err_connector_alloc: kfree(intel_dig_port); - - return; + return false; } void intel_dp_mst_suspend(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a28b4aac1e02..4a24b0067a3a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1284,7 +1284,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *); void intel_csr_ucode_resume(struct drm_i915_private *); /* intel_dp.c */ -void intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port); +bool intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port); bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); void intel_dp_set_link_params(struct intel_dp *intel_dp, From a5aac5ab876ad95b7f5e8d862afb07248ee9cae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Jun 2016 12:17:43 +0300 Subject: [PATCH 0457/1050] drm/i915: Check VBT for port presence in addition to the strap on VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some CHV boards failed to hook up the port presence straps for HDMI ports as well (earlier we assumed this problem only affected eDP ports). So let's check the VBT in addition to the strap, and if either one claims that the port is present go ahead and register the relevant connector. While at it, change port D to register DP before HDMI as we do for ports B and C since commit 457c52d87e5d ("drm/i915: Only ignore eDP ports that are connected") Also print a debug message when we register a HDMI connector to aid in diagnosing missing/incorrect ports. We already had such a print for DP/eDP. v2: Improve the comment in the code a bit, note the port D change in the commit message Cc: Radoslav Duda Tested-by: Radoslav Duda Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96321 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1464945463-14364-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 22f35042593c2b369861f0b9740efb8065a42db0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_bios.c | 39 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 30 ++++++++++++++------- drivers/gpu/drm/i915/intel_hdmi.c | 3 +++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5faacc6e548d..7c334e902266 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3481,6 +3481,7 @@ int intel_bios_init(struct drm_i915_private *dev_priv); bool intel_bios_is_valid_vbt(const void *buf, size_t size); bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); +bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port); bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port); bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port); bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port); diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 051b0db07efb..519818596e2a 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1545,6 +1545,45 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin) return false; } +/** + * intel_bios_is_port_present - is the specified digital port present + * @dev_priv: i915 device instance + * @port: port to check + * + * Return true if the device in %port is present. + */ +bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port) +{ + static const struct { + u16 dp, hdmi; + } port_mapping[] = { + [PORT_B] = { DVO_PORT_DPB, DVO_PORT_HDMIB, }, + [PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, }, + [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, + [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, + }; + int i; + + /* FIXME maybe deal with port A as well? */ + if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping)) + return false; + + if (!dev_priv->vbt.child_dev_num) + return false; + + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { + const union child_device_config *p_child = + &dev_priv->vbt.child_dev[i]; + if ((p_child->common.dvo_port == port_mapping[port].dp || + p_child->common.dvo_port == port_mapping[port].hdmi) && + (p_child->common.device_type & (DEVICE_TYPE_TMDS_DVI_SIGNALING | + DEVICE_TYPE_DISPLAYPORT_OUTPUT))) + return true; + } + + return false; +} + /** * intel_bios_is_port_edp - is the device in given port eDP * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 996e0c37293c..718f56525b96 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14554,7 +14554,7 @@ static void intel_setup_outputs(struct drm_device *dev) if (I915_READ(PCH_DP_D) & DP_DETECTED) intel_dp_init(dev, PCH_DP_D, PORT_D); } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { - bool has_edp; + bool has_edp, has_port; /* * The DP_DETECTED bit is the latched state of the DDC @@ -14564,25 +14564,37 @@ static void intel_setup_outputs(struct drm_device *dev) * Thus we can't rely on the DP_DETECTED bit alone to detect * eDP ports. Consult the VBT as well as DP_DETECTED to * detect eDP ports. + * + * Sadly the straps seem to be missing sometimes even for HDMI + * ports (eg. on Voyo V3 - CHT x7-Z8700), so check both strap + * and VBT for the presence of the port. Additionally we can't + * trust the port type the VBT declares as we've seen at least + * HDMI ports that the VBT claim are DP or eDP. */ has_edp = intel_dp_is_edp(dev, PORT_B); - if (I915_READ(VLV_DP_B) & DP_DETECTED || has_edp) + has_port = intel_bios_is_port_present(dev_priv, PORT_B); + if (I915_READ(VLV_DP_B) & DP_DETECTED || has_port) has_edp &= intel_dp_init(dev, VLV_DP_B, PORT_B); - if (I915_READ(VLV_HDMIB) & SDVO_DETECTED && !has_edp) + if ((I915_READ(VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp) intel_hdmi_init(dev, VLV_HDMIB, PORT_B); has_edp = intel_dp_is_edp(dev, PORT_C); - if (I915_READ(VLV_DP_C) & DP_DETECTED || has_edp) + has_port = intel_bios_is_port_present(dev_priv, PORT_C); + if (I915_READ(VLV_DP_C) & DP_DETECTED || has_port) has_edp &= intel_dp_init(dev, VLV_DP_C, PORT_C); - if (I915_READ(VLV_HDMIC) & SDVO_DETECTED && !has_edp) + if ((I915_READ(VLV_HDMIC) & SDVO_DETECTED || has_port) && !has_edp) intel_hdmi_init(dev, VLV_HDMIC, PORT_C); if (IS_CHERRYVIEW(dev)) { - /* eDP not supported on port D, so don't check VBT */ - if (I915_READ(CHV_HDMID) & SDVO_DETECTED) - intel_hdmi_init(dev, CHV_HDMID, PORT_D); - if (I915_READ(CHV_DP_D) & DP_DETECTED) + /* + * eDP not supported on port D, + * so no need to worry about it + */ + has_port = intel_bios_is_port_present(dev_priv, PORT_D); + if (I915_READ(CHV_DP_D) & DP_DETECTED || has_port) intel_dp_init(dev, CHV_DP_D, PORT_D); + if (I915_READ(CHV_HDMID) & SDVO_DETECTED || has_port) + intel_hdmi_init(dev, CHV_HDMID, PORT_D); } intel_dsi_init(dev); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 2c3bd9c2573e..a8844702d11b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2142,6 +2142,9 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, enum port port = intel_dig_port->port; uint8_t alternate_ddc_pin; + DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", + port_name(port)); + if (WARN(intel_dig_port->max_lanes < 4, "Not enough lanes (%d) for HDMI on port %c\n", intel_dig_port->max_lanes, port_name(port))) From 356d27bbfe332de0f2f78e55636e581960c9774e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 May 2016 12:08:34 +0300 Subject: [PATCH 0458/1050] drm/i915: Extract physical display dimensions from VBT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VBT has these mysterious H/V image sizes as part of the display timings. Looking at some dumps those appear to be the physical dimensions in mm. Which makes sense since the timing descriptor matches the format used by EDID detailed timing descriptor, which defines these as "H/V Addressable Video Image Size in mm". So let's use that information from the panel fixed mode to get the physical dimensions for LVDS/eDP/DSI displays. And with that we can fill out the display_info so that userspace can get at it via GetConnector. v2: Use (hi<<8)|lo instead of broken (hi<<4)+lo Handle LVDS and eDP too Cc: Stephen Just Tested-by: Stephen Just Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96255 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1464685714-30507-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola (cherry picked from commit df457245b5b7515cf97763ebd8975229e34d4cf3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 5 +++++ drivers/gpu/drm/i915/intel_dp.c | 5 ++++- drivers/gpu/drm/i915/intel_dsi.c | 3 +++ drivers/gpu/drm/i915/intel_lvds.c | 2 ++ drivers/gpu/drm/i915/intel_vbt_defs.h | 7 ++++--- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 519818596e2a..b9022fa053d6 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -139,6 +139,11 @@ fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode, else panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC; + panel_fixed_mode->width_mm = (dvo_timing->himage_hi << 8) | + dvo_timing->himage_lo; + panel_fixed_mode->height_mm = (dvo_timing->vimage_hi << 8) | + dvo_timing->vimage_lo; + /* Some VBTs have bogus h/vtotal values */ if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2991326842f5..ffe5f8430957 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5725,8 +5725,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, if (!fixed_mode && dev_priv->vbt.lfp_lvds_vbt_mode) { fixed_mode = drm_mode_duplicate(dev, dev_priv->vbt.lfp_lvds_vbt_mode); - if (fixed_mode) + if (fixed_mode) { fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; + connector->display_info.width_mm = fixed_mode->width_mm; + connector->display_info.height_mm = fixed_mode->height_mm; + } } mutex_unlock(&dev->mode_config.mutex); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 366ad6c67ce4..4756ef639648 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1545,6 +1545,9 @@ void intel_dsi_init(struct drm_device *dev) goto err; } + connector->display_info.width_mm = fixed_mode->width_mm; + connector->display_info.height_mm = fixed_mode->height_mm; + intel_panel_init(&intel_connector->panel, fixed_mode, NULL); intel_dsi_add_properties(intel_connector); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index bc53c0dd34d0..96281e628d2a 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -1082,6 +1082,8 @@ void intel_lvds_init(struct drm_device *dev) fixed_mode = drm_mode_duplicate(dev, dev_priv->vbt.lfp_lvds_vbt_mode); if (fixed_mode) { fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; + connector->display_info.width_mm = fixed_mode->width_mm; + connector->display_info.height_mm = fixed_mode->height_mm; goto out; } } diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index c15051de8023..44fb0b35eed3 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -403,9 +403,10 @@ struct lvds_dvo_timing { u8 vsync_off:4; u8 rsvd0:6; u8 hsync_off_hi:2; - u8 h_image; - u8 v_image; - u8 max_hv; + u8 himage_lo; + u8 vimage_lo; + u8 vimage_hi:4; + u8 himage_hi:4; u8 h_border; u8 v_border; u8 rsvd1:3; From 0487c44d1e1070b636ba3f3a12494ec456c2d005 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 10 Jun 2016 09:22:31 +0200 Subject: [PATCH 0459/1050] KVM: s390: ignore IBC if zero Looks like we forgot about the special IBC value of 0 meaning "no IBC". Let's fix that, otherwise it gets rounded up and suddenly an IBC is active with the lowest possible machine. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Fixes: commit 053dd2308d81 ("KVM: s390: force ibc into valid range") Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3ac9dd8..c9ae53924a69 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -657,7 +657,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) kvm->arch.model.cpuid = proc->cpuid; lowest_ibc = sclp.ibc >> 16 & 0xfff; unblocked_ibc = sclp.ibc & 0xfff; - if (lowest_ibc) { + if (lowest_ibc && proc->ibc) { if (proc->ibc > unblocked_ibc) kvm->arch.model.ibc = unblocked_ibc; else if (proc->ibc < lowest_ibc) From 9ec6de19235889ab0118e970ee732cb33c9efc06 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 6 May 2016 16:33:06 +0300 Subject: [PATCH 0460/1050] KVM: s390: Add stats for PEI events Add partial execution intercepted events in kvm_stats_debugfs. Signed-off-by: Alexander Yarygin Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 2 ++ arch/s390/kvm/kvm-s390.c | 1 + 3 files changed, 4 insertions(+) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 37b9017c6a96..ac82e8eb936d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,6 +245,7 @@ struct kvm_vcpu_stat { u32 exit_stop_request; u32 exit_validity; u32 exit_instruction; + u32 exit_pei; u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_poll_invalid; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2e6b54e4d3f9..252157181302 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -341,6 +341,8 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) static int handle_partial_execution(struct kvm_vcpu *vcpu) { + vcpu->stat.exit_pei++; + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ return handle_mvpg_pei(vcpu); if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c9ae53924a69..43f2a2b80490 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -61,6 +61,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_external_request", VCPU_STAT(exit_external_request) }, { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, From 06a84db74c3572cde79eb1b04f301399eafb8226 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 2 May 2016 15:27:34 +0300 Subject: [PATCH 0461/1050] iwlwifi: mvm: increase scan timeout to 20 seconds The 16 seconds timeout we were using turned out to be too short. Recalculations by system show that the total time in both bands should be < 18.5 seconds, even in the slowest cases (e.g. DCM P2P with DTIM=2). Rounding it up to 20 seconds for a bit more safety. Fixes: 728e825f81b1 ("iwlwifi: mvm: add a scan timeout for regular scans") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 6f609dd5c222..e78fc567ff7d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1222,7 +1222,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EIO; } -#define SCAN_TIMEOUT (16 * HZ) +#define SCAN_TIMEOUT (20 * HZ) void iwl_mvm_scan_timeout(unsigned long data) { From 7d6a1ab6a2db180122dee8db6c201f2dcf677813 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 15 May 2016 10:20:29 +0300 Subject: [PATCH 0462/1050] iwlwifi: mvm: fix RCU splat in TKIP's update_key The commit below mistakenly changed an rcu_dereference_check to a rcu_dereference_protected which introduced the following RCU warning: [ INFO: suspicious RCU usage. ] 4.6.0-rc7-next-20160513-dbg-00004-g8de8b92-dirty #655 Not tainted ------------------------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h:1069 suspicious rcu_dereference_protected() usage! Call Trace: [] lockdep_rcu_suspicious+0xf7/0x100 [] iwl_mvm_get_key_sta.part.0+0x5d/0x80 [iwlmvm] [] iwl_mvm_update_tkip_key+0xd3/0x162 [iwlmvm] [] iwl_mvm_mac_update_tkip_key+0x17/0x19 [iwlmvm] [] ieee80211_tkip_decrypt_data+0x22c/0x24b [mac80211] [] ieee80211_crypto_tkip_decrypt+0xc5/0x110 [mac80211] [] ieee80211_rx_handlers+0x9bb/0x1fe1 [mac80211] Fixes: 13303c0fb148 ("iwlwifi: mvm: use helpers to get iwl_mvm_sta") Reported-by: Sergey Senozhatsky Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index fea4d3437e2f..0454bfe0ef6c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1852,12 +1852,18 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm, mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) { u8 sta_id = mvmvif->ap_sta_id; + sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + /* * It is possible that the 'sta' parameter is NULL, * for example when a GTK is removed - the sta_id will then * be the AP ID, and no station was passed by mac80211. */ - return iwl_mvm_sta_from_staid_protected(mvm, sta_id); + if (IS_ERR_OR_NULL(sta)) + return NULL; + + return iwl_mvm_sta_from_mac80211(sta); } return NULL; From 1f9788f335d7c3145bcb59bd570c5b9ef7203ef4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 16 May 2016 14:34:20 +0300 Subject: [PATCH 0463/1050] iwlwifi: mvm: fix potential NULL-dereference in iwl_mvm_reorder() We try to access sta before we check for IS_ERR_OR_NULL(), so we may end up accessing a NULL pointer. To prevent that, move the conversion from sta to mvm_sta below the check. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Reported-by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index ac2c5718e454..2c61516d06ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -581,7 +581,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, struct iwl_rx_mpdu_desc *desc) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); + struct iwl_mvm_sta *mvm_sta; struct iwl_mvm_baid_data *baid_data; struct iwl_mvm_reorder_buffer *buffer; struct sk_buff *tail; @@ -604,6 +604,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, if (WARN_ON(IS_ERR_OR_NULL(sta))) return false; + mvm_sta = iwl_mvm_sta_from_mac80211(sta); + /* not a data packet */ if (!ieee80211_is_data_qos(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) From aa950524d501afa28869b7f56e539fd9e744dd9f Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 1 Jun 2016 00:28:09 +0300 Subject: [PATCH 0464/1050] iwlwifi: mvm: set the encryption type of an IGTK key The FW expect the driver to set the encryption algorithm type when installing the IGTK key in the HW. Currently when installing CMAC IGTK key we don't set the algorithm type and as a result the FW fails to calculate the MIC of multicast management frames. Fix it. Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0454bfe0ef6c..b23ab4a4504f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1961,6 +1961,14 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, struct ieee80211_key_seq seq; const u8 *pn; + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM); + break; + default: + return -EINVAL; + } + memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen); ieee80211_get_key_rx_seq(keyconf, 0, &seq); pn = seq.aes_cmac.pn; From 280a3efa82fccc9532c968a77e5162cb9f0af497 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Jun 2016 14:46:37 +0200 Subject: [PATCH 0465/1050] iwlwifi: mvm: fix a few firmware capability checks My cleanup in "iwlwifi: prepare for higher API/CAPA bits" accidentally inverted a few tests - fix them. Fixes: 859d914c8f5c ("iwlwifi: prepare for higher API/CAPA bits") Reported-by: Sara Sharon Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index e5f267b21316..18a8474b5760 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3851,8 +3851,8 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) + if (!fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) return -ENOENT; mutex_lock(&mvm->mutex); @@ -3898,8 +3898,8 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) + if (!fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) return; /* if beacon filtering isn't on mac80211 does it anyway */ From aaee8c3c5cce2d9107310dd9f3026b4f901d441c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:54:04 -0700 Subject: [PATCH 0466/1050] x86/entry/traps: Don't force in_interrupt() to return true in IST handlers Forcing in_interrupt() to return true if we're not in a bona fide interrupt confuses the softirq code. This fixes warnings like: NOHZ: local_softirq_pending 282 ... which can happen when running things like selftests/x86. This will change perf's static percpu buffer usage in IST context. I think this is okay, and it's changing the behavior to match historical (pre-4.0) behavior. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 959274753857 ("x86, traps: Track entry into and exit from IST context") Link: http://lkml.kernel.org/r/cdc215f94d118d691d73df35275022331156fb45.1464130360.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d1590486204a..00f03d82e69a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -96,6 +96,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } +/* + * In IST context, we explicitly disable preemption. This serves two + * purposes: it makes it much less likely that we would accidentally + * schedule in IST context and it will force a warning if we somehow + * manage to schedule by accident. + */ void ist_enter(struct pt_regs *regs) { if (user_mode(regs)) { @@ -110,13 +116,7 @@ void ist_enter(struct pt_regs *regs) rcu_nmi_enter(); } - /* - * We are atomic because we're on the IST stack; or we're on - * x86_32, in which case we still shouldn't schedule; or we're - * on x86_64 and entered from user mode, in which case we're - * still atomic unless ist_begin_non_atomic is called. - */ - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); @@ -124,7 +124,7 @@ void ist_enter(struct pt_regs *regs) void ist_exit(struct pt_regs *regs) { - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); if (!user_mode(regs)) rcu_nmi_exit(); @@ -155,7 +155,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) BUG_ON((unsigned long)(current_top_of_stack() - current_stack_pointer()) >= THREAD_SIZE); - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); } /** @@ -165,7 +165,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) */ void ist_end_non_atomic(void) { - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); } static nokprobe_inline int From ef2bf4997f7da6efa8540d9cf726c44bf2b863af Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 27 May 2016 09:45:49 -0700 Subject: [PATCH 0467/1050] pwm: Improve args checking in pwm_apply_state() It seems like in the process of refactoring pwm_config() to utilize the newly-introduced pwm_apply_state() API, some args/bounds checking was dropped. In particular, I noted that we are now allowing invalid period selections, e.g.: # echo 1 > /sys/class/pwm/pwmchip0/export # cat /sys/class/pwm/pwmchip0/pwm1/period 100 # echo 101 > /sys/class/pwm/pwmchip0/pwm1/duty_cycle [... driver may or may not reject the value, or trigger some logic bug ...] It's better to see: # echo 1 > /sys/class/pwm/pwmchip0/export # cat /sys/class/pwm/pwmchip0/pwm1/period 100 # echo 101 > /sys/class/pwm/pwmchip0/pwm1/duty_cycle -bash: echo: write error: Invalid argument This patch reintroduces some bounds checks in both pwm_config() (for its signed parameters; we don't want to convert negative values into large unsigned values) and in pwm_apply_state() (which fix the above described behavior, as well as other potential API misuses). Fixes: 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates") Signed-off-by: Brian Norris Acked-by: Boris Brezillon Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 3 ++- include/linux/pwm.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index dba3843c53b8..ed337a8c34ab 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -457,7 +457,8 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) { int err; - if (!pwm) + if (!pwm || !state || !state->period || + state->duty_cycle > state->period) return -EINVAL; if (!memcmp(state, &pwm->state, sizeof(*state))) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 17018f3c066e..908b67c847cd 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -235,6 +235,9 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, if (!pwm) return -EINVAL; + if (duty_ns < 0 || period_ns < 0) + return -EINVAL; + pwm_get_state(pwm, &state); if (state.duty_cycle == duty_ns && state.period == period_ns) return 0; From fe5aa34d6eb9c4d34071845f70f3714b41c8a77d Mon Sep 17 00:00:00 2001 From: Ryo Kodama Date: Wed, 8 Jun 2016 10:58:23 +0900 Subject: [PATCH 0468/1050] pwm: sysfs: Get return value from pwm_apply_state() This patch adds to check the return value from pwm_apply_state() used in enable_store(). The error of enable_store() doesn't work if the return value doesn't received. Signed-off-by: Ryo Kodama Signed-off-by: Yoshihiro Shimoda Fixes: 39100ceea79f ("pwm: Switch to the atomic API") Signed-off-by: Thierry Reding --- drivers/pwm/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index d98599249a05..01695d48dd54 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -152,7 +152,7 @@ static ssize_t enable_store(struct device *child, goto unlock; } - pwm_apply_state(pwm, &state); + ret = pwm_apply_state(pwm, &state); unlock: mutex_unlock(&export->lock); From 9d98bcec731756b8688b59ec998707924d716d7b Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Wed, 8 Jun 2016 14:59:52 +0800 Subject: [PATCH 0469/1050] x86/ioapic: Fix incorrect pointers in ioapic_setup_resources() On a 4-socket Brickland system, hot-removing one ioapic is fine. Hot-removing the 2nd one causes panic in mp_unregister_ioapic() while calling release_resource(). It is because the iomem_res pointer has already been released when removing the first ioapic. To explain the use of &res[num] here: res is assigned to ioapic_resources, and later in ioapic_insert_resources() we do: struct resource *r = ioapic_resources; for_each_ioapic(i) { insert_resource(&iomem_resource, r); r++; } Here 'r' is treated as an arry of 'struct resource', and the r++ ensures that each element of the array is inserted separately. Thus we should call release_resouce() on each element at &res[num]. Fix it by assigning the correct pointers to ioapics[i].iomem_res in ioapic_setup_resources(). Signed-off-by: Rui Wang Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1465369193-4816-3-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 84e33ff5a6d5..446702ed99dc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2588,8 +2588,8 @@ static struct resource * __init ioapic_setup_resources(void) res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; + ioapics[i].iomem_res = &res[num]; num++; - ioapics[i].iomem_res = res; } ioapic_resources = res; From ba65dc5ef16f82fba77869cecf7a7d515f61446b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2016 11:32:47 -0400 Subject: [PATCH 0470/1050] much milder d_walk() race d_walk() relies upon the tree not getting rearranged under it without rename_lock being touched. And we do grab rename_lock around the places that change the tree topology. Unfortunately, branch reordering is just as bad from d_walk() POV and we have two places that do it without touching rename_lock - one in handling of cursors (for ramfs-style directories) and another in autofs. autofs one is a separate story; this commit deals with the cursors. * mark cursor dentries explicitly at allocation time * make __dentry_kill() leave ->d_child.next pointing to the next non-cursor sibling, making sure that it won't be moved around unnoticed before the parent is relocked on ascend-to-parent path in d_walk(). * make d_walk() skip cursors explicitly; strictly speaking it's not necessary (all callbacks we pass to d_walk() are no-ops on cursors), but it makes analysis easier. Signed-off-by: Al Viro --- fs/dcache.c | 58 +++++++++++++++++++++++++++++++++++++----- fs/internal.h | 1 + fs/libfs.c | 4 +-- include/linux/dcache.h | 1 + 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 817c243c1ff1..b7eddfd35aa5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1203,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock diff --git a/fs/internal.h b/fs/internal.h index b71deeecea17..f57ced528cde 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool); extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); +extern struct dentry *d_alloc_cursor(struct dentry *); /* * read_write.c diff --git a/fs/libfs.c b/fs/libfs.c index 3db2721144c2..cedeacbae303 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = QSTR_INIT(".", 1); - - file->private_data = d_alloc(file->f_path.dentry, &cursor_name); + file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 484c8792da82..bcd0c64e3ed8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -212,6 +212,7 @@ struct dentry_operations { #define DCACHE_OP_REAL 0x08000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR 0x20000000 extern seqlock_t rename_lock; From e54ad7f1ee263ffa5a2de9c609d58dfa27b21cd9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:05 +0200 Subject: [PATCH 0471/1050] proc: prevent stacking filesystems on top This prevents stacking filesystems (ecryptfs and overlayfs) from using procfs as lower filesystem. There is too much magic going on inside procfs, and there is no good reason to stack stuff on top of procfs. (For example, procfs does access checks in VFS open handlers, and ecryptfs by design calls open handlers from a kernel thread that doesn't drop privileges or so.) Signed-off-by: Jann Horn Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/proc/root.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/proc/root.c b/fs/proc/root.c index 361ab4ee42fc..ec649c92d270 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) return ERR_CAST(sb); + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + if (!proc_parse_options(options, ns)) { deactivate_locked_super(sb); return ERR_PTR(-EINVAL); From 2f36db71009304b3f0b95afacd8eba1f9f046b87 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:06 +0200 Subject: [PATCH 0472/1050] ecryptfs: forbid opening files without mmap handler This prevents users from triggering a stack overflow through a recursive invocation of pagefault handling that involves mapping procfs files into virtual memory. Signed-off-by: Jann Horn Acked-by: Tyler Hicks Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/ecryptfs/kthread.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 866bb18efefe..e818f5ac7a26 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -147,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto out; + goto have_file; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -165,8 +166,16 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) + if (IS_ERR(*lower_file)) { rc = PTR_ERR(*lower_file); + goto out; + } +have_file: + if ((*lower_file)->f_op->mmap == NULL) { + fput(*lower_file); + *lower_file = NULL; + rc = -EMEDIUMTYPE; + } out: return rc; } From 29d6455178a09e1dc340380c582b13356227e8df Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:07 +0200 Subject: [PATCH 0473/1050] sched: panic on corrupted stack end Until now, hitting this BUG_ON caused a recursive oops (because oops handling involves do_exit(), which calls into the scheduler, which in turn raises an oops), which caused stuff below the stack to be overwritten until a panic happened (e.g. via an oops in interrupt context, caused by the overwritten CPU index in the thread_info). Just panic directly. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d1f7149f8704..11546a6ed5df 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3047,7 +3047,8 @@ static noinline void __schedule_bug(struct task_struct *prev) static inline void schedule_debug(struct task_struct *prev) { #ifdef CONFIG_SCHED_STACK_END_CHECK - BUG_ON(task_stack_end_corrupted(prev)); + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); #endif if (unlikely(in_atomic_preempt_off())) { From a44323e2a8f342848bb77e8e04fcd85fcb91b3b4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:13:02 -0700 Subject: [PATCH 0474/1050] uvc: Forward compat ioctls to their handlers directly The current code goes through a lot of indirection just to call a known handler. Simplify it: just call the handlers directly. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski --- drivers/media/usb/uvc/uvc_v4l2.c | 39 +++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index d7723ce772b3..12690c1ea8f8 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -1408,47 +1408,44 @@ static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp, static long uvc_v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) { + struct uvc_fh *handle = file->private_data; union { struct uvc_xu_control_mapping xmap; struct uvc_xu_control_query xqry; } karg; void __user *up = compat_ptr(arg); - mm_segment_t old_fs; long ret; switch (cmd) { case UVCIOC_CTRL_MAP32: - cmd = UVCIOC_CTRL_MAP; ret = uvc_v4l2_get_xu_mapping(&karg.xmap, up); + if (ret) + return ret; + ret = uvc_ioctl_ctrl_map(handle->chain, &karg.xmap); + if (ret) + return ret; + ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up); + if (ret) + return ret; + break; case UVCIOC_CTRL_QUERY32: - cmd = UVCIOC_CTRL_QUERY; ret = uvc_v4l2_get_xu_query(&karg.xqry, up); + if (ret) + return ret; + ret = uvc_xu_ctrl_query(handle->chain, &karg.xqry); + if (ret) + return ret; + ret = uvc_v4l2_put_xu_query(&karg.xqry, up); + if (ret) + return ret; break; default: return -ENOIOCTLCMD; } - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = video_ioctl2(file, cmd, (unsigned long)&karg); - set_fs(old_fs); - - if (ret < 0) - return ret; - - switch (cmd) { - case UVCIOC_CTRL_MAP: - ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up); - break; - - case UVCIOC_CTRL_QUERY: - ret = uvc_v4l2_put_xu_query(&karg.xqry, up); - break; - } - return ret; } #endif From f89dec72e98b34b3be66bb1ef1be62974c0f8483 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 11 May 2016 17:41:27 -0700 Subject: [PATCH 0475/1050] uvc_v4l2: Simplify compat ioctl implementation The uvc compat ioctl implementation seems to have copied user data for no good reason. Remove a bunch of copies. Signed-off-by: Andy Lutomirski --- drivers/media/usb/uvc/uvc_v4l2.c | 58 ++------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index 12690c1ea8f8..c04bc6afb965 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -1274,8 +1274,6 @@ struct uvc_xu_control_mapping32 { static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp, const struct uvc_xu_control_mapping32 __user *up) { - struct uvc_menu_info __user *umenus; - struct uvc_menu_info __user *kmenus; compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || @@ -1292,17 +1290,7 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp, if (__get_user(p, &up->menu_info)) return -EFAULT; - umenus = compat_ptr(p); - if (!access_ok(VERIFY_READ, umenus, kp->menu_count * sizeof(*umenus))) - return -EFAULT; - - kmenus = compat_alloc_user_space(kp->menu_count * sizeof(*kmenus)); - if (kmenus == NULL) - return -EFAULT; - kp->menu_info = kmenus; - - if (copy_in_user(kmenus, umenus, kp->menu_count * sizeof(*umenus))) - return -EFAULT; + kp->menu_info = compat_ptr(p); return 0; } @@ -1310,10 +1298,6 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp, static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp, struct uvc_xu_control_mapping32 __user *up) { - struct uvc_menu_info __user *umenus; - struct uvc_menu_info __user *kmenus = kp->menu_info; - compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || __copy_to_user(up, kp, offsetof(typeof(*up), menu_info)) || __put_user(kp->menu_count, &up->menu_count)) @@ -1322,16 +1306,6 @@ static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp, if (__clear_user(up->reserved, sizeof(up->reserved))) return -EFAULT; - if (kp->menu_count == 0) - return 0; - - if (get_user(p, &up->menu_info)) - return -EFAULT; - umenus = compat_ptr(p); - - if (copy_in_user(umenus, kmenus, kp->menu_count * sizeof(*umenus))) - return -EFAULT; - return 0; } @@ -1346,8 +1320,6 @@ struct uvc_xu_control_query32 { static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp, const struct uvc_xu_control_query32 __user *up) { - u8 __user *udata; - u8 __user *kdata; compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || @@ -1361,17 +1333,7 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp, if (__get_user(p, &up->data)) return -EFAULT; - udata = compat_ptr(p); - if (!access_ok(VERIFY_READ, udata, kp->size)) - return -EFAULT; - - kdata = compat_alloc_user_space(kp->size); - if (kdata == NULL) - return -EFAULT; - kp->data = kdata; - - if (copy_in_user(kdata, udata, kp->size)) - return -EFAULT; + kp->data = compat_ptr(p); return 0; } @@ -1379,26 +1341,10 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp, static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp, struct uvc_xu_control_query32 __user *up) { - u8 __user *udata; - u8 __user *kdata = kp->data; - compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || __copy_to_user(up, kp, offsetof(typeof(*up), data))) return -EFAULT; - if (kp->size == 0) - return 0; - - if (get_user(p, &up->data)) - return -EFAULT; - udata = compat_ptr(p); - if (!access_ok(VERIFY_READ, udata, kp->size)) - return -EFAULT; - - if (copy_in_user(udata, kdata, kp->size)) - return -EFAULT; - return 0; } From 69f1804a9ab602701217a8c23d371f8f36f8b57a Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 9 May 2016 00:07:46 -0400 Subject: [PATCH 0476/1050] mei: don't use wake_up_interruptible for wr_ctrl wr_ctrl waiters are none interruptible, so should be waken up with call to wake_up and not to wake_up_interruptible. This fixes commit: 7ff4bdd ("mei: fix waiting for wr_ctrl for corner cases.") Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index eed254da63a8..641c1a566687 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -730,7 +730,7 @@ static void mei_cl_wake_all(struct mei_cl *cl) /* synchronized under device mutex */ if (waitqueue_active(&cl->wait)) { cl_dbg(dev, cl, "Waking up ctrl write clients!\n"); - wake_up_interruptible(&cl->wait); + wake_up(&cl->wait); } } From fc0f7e3317c5f406e6d5520209b4689a4ffecfdf Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Mon, 6 Jun 2016 10:47:47 +0200 Subject: [PATCH 0477/1050] net: phy: smsc: reintroduced unconditional soft reset We detected some problems using the smsc lan8720a in combination with i.MX28 and tracked this down to commit 21009686662f ("net: phy: smsc: move smsc_phy_config_init reset part in a soft_reset function") With 2100968666 the generic soft reset is replaced by a specific function which handles power down state correctly. But additionally the soft reset itself got conditional and is therefore also only performed if the phy is in power down state. This patch keeps the conditional wake up from power down, but re-introduces the unconditional soft reset using the generic soft reset function. It was tested on linux-4.1.25 and linux-4.7.0-rc2. Signed-off-by: Manfred Schlaegl Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 2e21e9366f76..b62c4aaee40b 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev) * in all capable mode before using it. */ if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) { - int timeout = 50000; - - /* set "all capable" mode and reset the phy */ + /* set "all capable" mode */ rc |= MII_LAN83C185_MODE_ALL; phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc); - phy_write(phydev, MII_BMCR, BMCR_RESET); - - /* wait end of reset (max 500 ms) */ - do { - udelay(10); - if (timeout-- == 0) - return -1; - rc = phy_read(phydev, MII_BMCR); - } while (rc & BMCR_RESET); } - return 0; + + /* reset the phy */ + return genphy_soft_reset(phydev); } static int lan911x_config_init(struct phy_device *phydev) From 56fae404fb2c306db0a35dad0d16fa24c65678f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 Jun 2016 12:06:58 +0300 Subject: [PATCH 0478/1050] bridge: Fix incorrect re-injection of STP packets Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") fixed incorrect usage of NF_HOOK's return value by consuming packets in okfn via br_pass_frame_up(). However, this function re-injects packets to the Rx path with skb->dev set to the bridge device, which breaks kernel's STP, as all STP packets appear to originate from the bridge device itself. Instead, if STP is enabled and bridge isn't a 802.1ad bridge, then learn packet's SMAC and inject it back to the Rx path for further processing by the packet handlers. The patch also makes netfilter's behavior consistent with regards to packets destined to the Bridge Group Address, as no hook registered at LOCAL_IN will ever be called, regardless if STP is enabled or not. Cc: Florian Westphal Cc: Shmulik Ladkani Cc: Toshiaki Makita Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/bridge/br_input.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 160797722228..43d2cd862bc2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -213,8 +213,7 @@ drop: } EXPORT_SYMBOL_GPL(br_handle_frame_finish); -/* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void __br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + + __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); @@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5])) goto forward; - break; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ goto drop; From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 18:05:09 +0100 Subject: [PATCH 0479/1050] net: diag: add missing declarations The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill seem to have been missed from the include/linux/inet_diag.h header file. Add them to fix the following warnings: net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static? net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 6 ++++++ net/sctp/sctp_diag.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7c27fa1030e8..feb04ea20f11 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net, int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); + +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* _INET_DIAG_H_ */ diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b87618..f69edcf219e5 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include #include -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Jun 2016 12:11:06 +0100 Subject: [PATCH 0480/1050] net: ipconfig: avoid warning by making ic_addrservaddr static The symbol ic_addrservaddr is not static, but has no declaration to match so make it static to fix the following warning: net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2b5f2f..eccf9fd190c0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ __be32 ic_servaddr = NONE; /* Boot server IP address */ From 562c5a70400c75f50d99de631666ac7cc2f03958 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:58 +0200 Subject: [PATCH 0481/1050] net: mediatek: add missing return code check The code fails to check if the scratch memory was properly allocated. Add this check and return with an error if the allocation failed. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4763252bbf85..11cd7304e497 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -495,6 +495,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL); + if (unlikely(!eth->scratch_head)) + return -ENOMEM; + dma_addr = dma_map_single(eth->dev, eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE, DMA_FROM_DEVICE); From 605e4fe476956c67ced8518247e6c9601a31b868 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:59 +0200 Subject: [PATCH 0482/1050] net: mediatek: fix missing free of scratch memory Scratch memory gets allocated in mtk_init_fq_dma() but the corresponding code to free it is missing inside mtk_dma_free() causing a memory leak. With this patch applied, we can run ifconfig up/down several thousand times without any problems. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 18 +++++++++++++----- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 11cd7304e497..fb8b17db7fa2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -481,14 +481,14 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { - dma_addr_t phy_ring_head, phy_ring_tail; + dma_addr_t phy_ring_tail; int cnt = MTK_DMA_SIZE; dma_addr_t dma_addr; int i; eth->scratch_ring = dma_alloc_coherent(eth->dev, cnt * sizeof(struct mtk_tx_dma), - &phy_ring_head, + ð->phy_scratch_ring, GFP_ATOMIC | __GFP_ZERO); if (unlikely(!eth->scratch_ring)) return -ENOMEM; @@ -505,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) return -ENOMEM; memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt); - phy_ring_tail = phy_ring_head + + phy_ring_tail = eth->phy_scratch_ring + (sizeof(struct mtk_tx_dma) * (cnt - 1)); for (i = 0; i < cnt; i++) { eth->scratch_ring[i].txd1 = (dma_addr + (i * MTK_QDMA_PAGE_SIZE)); if (i < cnt - 1) - eth->scratch_ring[i].txd2 = (phy_ring_head + + eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring + ((i + 1) * sizeof(struct mtk_tx_dma))); eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE); } - mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD); + mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD); mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL); mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT); mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN); @@ -1210,6 +1210,14 @@ static void mtk_dma_free(struct mtk_eth *eth) for (i = 0; i < MTK_MAC_COUNT; i++) if (eth->netdev[i]) netdev_reset_queue(eth->netdev[i]); + if (eth->scratch_ring) { + dma_free_coherent(eth->dev, + MTK_DMA_SIZE * sizeof(struct mtk_tx_dma), + eth->scratch_ring, + eth->phy_scratch_ring); + eth->scratch_ring = NULL; + eth->phy_scratch_ring = 0; + } mtk_tx_clean(eth); mtk_rx_clean(eth); kfree(eth->scratch_head); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index eed626d56ea4..57f7e8a3dbe2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -357,6 +357,7 @@ struct mtk_rx_ring { * @rx_ring: Pointer to the memore holding info about the RX ring * @rx_napi: The NAPI struct * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring + * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. * @clk_ethif: The ethif clock * @clk_esw: The switch clock @@ -384,6 +385,7 @@ struct mtk_eth { struct mtk_rx_ring rx_ring; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; + dma_addr_t phy_scratch_ring; void *scratch_head; struct clk *clk_ethif; struct clk *clk_esw; From 2fae723cefb8bfe712371978288aa0a70b54802e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:00 +0200 Subject: [PATCH 0483/1050] net: mediatek: invalid buffer lookup in mtk_tx_map() The lookup of the tx_buffer in the error path inside mtk_tx_map() uses the wrong descriptor pointer. This looks like a copy & paste error. Change the code to use the correct pointer. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index fb8b17db7fa2..8c3e54387948 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -674,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, err_dma: do { - tx_buf = mtk_desc_to_tx_buf(ring, txd); + tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ mtk_tx_unmap(&dev->dev, tx_buf); From 94321a9fc9f5b6c6e949cc7c69741538f556ab74 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:01 +0200 Subject: [PATCH 0484/1050] net: mediatek: dropped rx packets are not being counted properly There are two places inside mtk_poll_rx where rx_dropped is not being incremented properly. Fix this by adding the missing code to increment the counter. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8c3e54387948..b6d3c217722d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -829,6 +829,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { skb_free_frag(new_data); + netdev->stats.rx_dropped++; goto release_desc; } @@ -836,6 +837,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb = build_skb(data, ring->frag_size); if (unlikely(!skb)) { put_page(virt_to_head_page(new_data)); + netdev->stats.rx_dropped++; goto release_desc; } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); From 6675086d04e7c0748cd5884f7c8611b5f0836250 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:02 +0200 Subject: [PATCH 0485/1050] net: mediatek: add next data pointer coherency protection The QDMA engine can fail to update the register pointing to the next TX descriptor if this bit does not get set in the QDMA configuration register. Not setting this bit can result in invalid values inside the TX rings registers which will causes TX stalls. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b6d3c217722d..f30c2e474b87 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1282,7 +1282,7 @@ static int mtk_start_dma(struct mtk_eth *eth) mtk_w32(eth, MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN | MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS | - MTK_RX_BT_32DWORDS, + MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO, MTK_QDMA_GLO_CFG); return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 57f7e8a3dbe2..a5eb7c62306b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -91,6 +91,7 @@ #define MTK_QDMA_GLO_CFG 0x1A04 #define MTK_RX_2B_OFFSET BIT(31) #define MTK_RX_BT_32DWORDS (3 << 11) +#define MTK_NDP_CO_PRO BIT(10) #define MTK_TX_WB_DDONE BIT(6) #define MTK_DMA_SIZE_16DWORDS (2 << 4) #define MTK_RX_DMA_BUSY BIT(3) From 2ff0bb61646f286fa97db2904491974302a14f1f Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:03 +0200 Subject: [PATCH 0486/1050] net: mediatek: disable all interrupts during probe The current code only disables those IRQs that we will later use. To ensure that we have a predefined state, we really want to disable all IRQs. Change the code to disable all IRQs to achieve this. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f30c2e474b87..e3dadae9f6dd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1396,7 +1396,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_disable(eth, ~0); mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); mtk_w32(eth, 0, MTK_RST_GL); From 04698cccb1de54d5d97fda2e4a1c6ca365da0f70 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:04 +0200 Subject: [PATCH 0487/1050] net: mediatek: fix threshold value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to calculate the threshold value for stopping the TX queue is bad. Currently it will always use 1/2 of the rings size, which is way too much. Set the threshold to MAX_SKB_FRAGS. This makes sure that the queue is stopped when there is not enough room to accept an additional segment.  Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e3dadae9f6dd..032939d211a7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1033,8 +1033,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; - ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2, - MAX_SKB_FRAGS); + ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we * continue From eaadf9fd3f6390f6ecbd0dfbd5997a0486fc9d5e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:05 +0200 Subject: [PATCH 0488/1050] net: mediatek: increase watchdog_timeo During stress testing, after reducing the threshold value, we have seen TX timeouts that were caused by the watchdog_timeo value being too low. Increase the value to 5 * HZ which is a value commonly used by many other drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 032939d211a7..3b3ba2e55bdc 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1709,7 +1709,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); - eth->netdev[id]->watchdog_timeo = HZ; + eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; eth->netdev[id]->vlan_features = MTK_HW_FEATURES & From 12c97c13ea7174db5b5dc4a1ef91d4e9245bb569 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:06 +0200 Subject: [PATCH 0489/1050] net: mediatek: fix off by one in the TX ring allocation The TX ring setup has an off by one error causing it to not utilise all descriptors. This has the side effect that we need to reset the next pointer at runtime to make it work. Fix the off by one and remove the code fixing the ring at runtime. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3b3ba2e55bdc..c097e9e3926c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -926,7 +926,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) } mtk_tx_unmap(eth->dev, tx_buf); - ring->last_free->txd2 = next_cpu; ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1032,7 +1031,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; - ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; + ring->last_free = &ring->dma[MTK_DMA_SIZE - 1]; ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we From ad3cba989e8b1bbefe078eece29f0e8d8aaea1d6 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:07 +0200 Subject: [PATCH 0490/1050] net: mediatek: only wake the queue if it is stopped The current code unconditionally wakes up the queue at the end of each tx_poll action. Change the code to only wake up the queues if any of them have actually been stopped before. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c097e9e3926c..40e37b158a69 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -704,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb) return nfrags; } +static int mtk_queue_stopped(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + if (netif_queue_stopped(eth->netdev[i])) + return 1; + } + + return 0; +} + static void mtk_wake_queue(struct mtk_eth *eth) { int i; @@ -950,7 +964,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) if (!total) return 0; - if (atomic_read(&ring->free_count) > ring->thresh) + if (mtk_queue_stopped(eth) && + (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); return total; From 82c6544dddc6c4bd940917af2f987dee6be0fc17 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:08 +0200 Subject: [PATCH 0491/1050] net: mediatek: remove superfluous queue wake up call The code checks if the queue should be stopped because we are below the threshold of free descriptors only to check if it should be started again. If we do end up in a state where we are at the threshold limit, it makes more sense to just stop the queue and wait for the next IRQ to trigger the TX housekeeping again. There is no rush in enqueuing the next packet, it needs to wait for all the others in the queue to be dispatched first anyway. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 40e37b158a69..d1cdc2d76151 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -783,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0) goto drop; - if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) { + if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) mtk_stop_queue(eth); - if (unlikely(atomic_read(&ring->free_count) > - ring->thresh)) - mtk_wake_queue(eth); - } + spin_unlock_irqrestore(ð->page_lock, flags); return NETDEV_TX_OK; From a2f27217e4e60e663b5b971b0ccb287a9548b04e Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Fri, 10 Jun 2016 16:53:05 +0200 Subject: [PATCH 0492/1050] net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db..d8c77e8e25ed 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,13 +508,12 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); - if (!aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); + if (phydev && !aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ From 86c5fe4c932a8ef2d32f8b3d32cc9f0476fc54f3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 10 Jun 2016 23:34:24 -0700 Subject: [PATCH 0493/1050] Revert "net: au1000_eth: fix PHY detection" This reverts commit a2f27217e4e60e663b5b971b0ccb287a9548b04e. I applied the wrong version of this. Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index d8c77e8e25ed..e0fb0f1122db 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,12 +508,13 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { - phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); - if (phydev && !aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) + if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (!aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ From 92ca8241533009e4e05a9f3999a75389678af094 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Sat, 11 Jun 2016 00:13:04 +0200 Subject: [PATCH 0494/1050] net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db..20760e10211a 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev) * on the current MAC's MII bus */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (mdiobus_get_phy(aup->mii_bus, phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); if (!aup->phy_search_highest_addr) /* break out with first one found */ break; From 6e85dbe4b461e59fa3cad6f6235cb47fa4c6a629 Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Fri, 3 Jun 2016 21:30:24 +0300 Subject: [PATCH 0495/1050] iio: inv_mpu6050: Fix use-after-free in ACPI code In some cases this can result in incorrectly returning a negative value from asus_acpi_get_sensor_info and the AK8963 magnetometer failing to show up. Note cpm is an alias for buffer.pointer which isn't apparent in this patch on it's own. Cc: Srinivas Pandruvada Signed-off-by: Crestez Dan Leonard Acked-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c index f62b8bd9ad7e..dd6fc6d21f9d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c @@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev, int i; acpi_status status; union acpi_object *cpm; + int ret; status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer); if (ACPI_FAILURE(status)) @@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev, } } } - + ret = cpm->package.count; kfree(buffer.pointer); - return cpm->package.count; + return ret; } static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data) From 7e982555d89cc84b1fa23b5d54c7ffd9f7753908 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 May 2016 15:50:24 +0200 Subject: [PATCH 0496/1050] staging: iio: fix ad7606_spi regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed out by Geert Uytterhoeven, the patch was incorrect and breaks the driver, which was fortunately pointed out by this gcc warning: drivers/staging/iio/adc/ad7606_spi.c: In function ‘ad7606_spi_read_block’: drivers/staging/iio/adc/ad7606_spi.c:34: warning: ‘data’ is used uninitialized in this function The effect of the patch is that the data is copied into a random memory location (from the uninitialized pointer) instead of being byteswapped in place. This adds the initialization for the 'data' variable back to restore the original behavior. Cc: Ksenija Stanojevic Fixes: 87787e5ef727 ("Staging: iio: Fix sparse endian warning") Signed-off-by: Arnd Bergmann Acked-by: Geert Uytterhoeven Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7606_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c index 825da0769936..9587fa86dc69 100644 --- a/drivers/staging/iio/adc/ad7606_spi.c +++ b/drivers/staging/iio/adc/ad7606_spi.c @@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev, { struct spi_device *spi = to_spi_device(dev); int i, ret; - unsigned short *data; + unsigned short *data = buf; __be16 *bdata = buf; ret = spi_read(spi, buf, count * 2); From f4070a19142d5ee06f0da0cef56a0e78995f172c Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 1 Jun 2016 20:25:54 +0100 Subject: [PATCH 0497/1050] staging: iio: ad5933: fix order of cycle conditions Correctly handle the settling time cycles value. The else branch is an impossible condition, > 1022 in the else branch of > 511. Flipping the order. Based on the Table 13 at the bottom of Page 25 of the Data Sheet: http://www.analog.com/media/en/technical-documentation/data-sheets/AD5933.pdf Signed-off-by: Luis de Bethencourt Reviewed-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 9f43976f4ef2..170ac980abcb 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev, st->settling_cycles = val; /* 2x, 4x handling, see datasheet */ - if (val > 511) - val = (val >> 1) | (1 << 9); - else if (val > 1022) + if (val > 1022) val = (val >> 2) | (3 << 9); + else if (val > 511) + val = (val >> 1) | (1 << 9); dat = cpu_to_be16(val); ret = ad5933_i2c_write(st->client, From 86ef7f9cbfd564377028098cf20cc1c3ec2c776d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Jun 2016 20:40:24 -0700 Subject: [PATCH 0498/1050] ipconfig: Protect ic_addrservaddr with IPCONFIG_DYNAMIC. >> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable] static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index eccf9fd190c0..1d71c40eaaf3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +#ifdef IPCONFIG_DYNAMIC static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ From 5edb56491d4812c42175980759da53388e5d86f5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jun 2016 07:20:35 -0700 Subject: [PATCH 0499/1050] Linux 4.7-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8d1301ab59fd..b409076c7c18 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From ea01a18494b3d7a91b2f1f2a6a5aaef4741bc294 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2016 11:24:46 -0400 Subject: [PATCH 0500/1050] autofs races * make autofs4_expire_indirect() skip the dentries being in process of expiry * do *not* mess with list_move(); making sure that dentry with AUTOFS_INF_EXPIRING are not picked for expiry is enough. * do not remove NO_RCU when we set EXPIRING, don't bother with smp_mb() there. Clear it at the same time we clear EXPIRING. Makes a bunch of tests simpler. * rename NO_RCU to WANT_EXPIRE, which is what it really is. Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 8 ++++++-- fs/autofs4/expire.c | 27 ++++++++------------------- fs/autofs4/root.c | 2 +- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f0d268b97d19..a439548de785 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -70,9 +70,13 @@ struct autofs_info { }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ -#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered +#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered * for expiry, so RCU_walk is - * not permitted + * not permitted. If it progresses to + * actual expiry attempt, the flag is + * not cleared when EXPIRING is set - + * in that case it gets cleared only + * when it comes to clearing EXPIRING. */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 9510d8d2e9cd..b493909e7492 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, if (ino->flags & AUTOFS_INF_PENDING) goto out; if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; } out: spin_unlock(&sbi->fs_lock); @@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, while ((dentry = get_next_positive_subdir(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_NO_RCU) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) expired = NULL; else expired = should_expire(dentry, mnt, timeout, how); @@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, continue; } ino = autofs4_dentry_ino(expired); - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); @@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, goto found; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; if (expired != dentry) dput(expired); spin_unlock(&sbi->fs_lock); @@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, found: pr_debug("returning %p %pd\n", expired, expired); ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&sbi->lookup_lock); - spin_lock(&expired->d_parent->d_lock); - spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_child); - spin_unlock(&expired->d_lock); - spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&sbi->lookup_lock); return expired; } @@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) int status; /* Block on any pending expire */ - if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))) + if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) return 0; if (rcu_walk) return -ECHILD; @@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb, ino = autofs4_dentry_ino(dentry); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 78bd80298528..3767f6641af1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) */ struct inode *inode; - if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) return 0; if (d_mountpoint(dentry)) return 0; From cbdf451164785c9cf5acd5d2983c1e7c778df4c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Jun 2016 16:21:47 -0700 Subject: [PATCH 0501/1050] net_sched: prio: properly report out of memory errors At Qdisc creation or change time, prio_tune() creates missing pfifo qdiscs but does not return an error code if one qdisc could not be allocated. Leaving a qdisc in non operational state without telling user anything about this problem is not good. Also, testing if we replace something different than noop_qdisc a second time makes no sense so I removed useless code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4b0a82191bc4..071718bccdab 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -202,26 +202,18 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; + struct Qdisc *child; - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; + if (q->queues[i] != &noop_qdisc) + continue; - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!child) + return -ENOMEM; + sch_tree_lock(sch); + q->queues[i] = child; + sch_tree_unlock(sch); } return 0; } From d941ebe88a411aa281cc80477a93feb931a1b50b Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Sat, 11 Jun 2016 01:11:54 +0300 Subject: [PATCH 0502/1050] net: ethernet: ti: cpsw: use destroy ctlr to destroy channels There is no reason to destroy channels that are destroyed while cpdma_ctlr destroy. In this case no need to remember how much channels where created and destroy them by one, as cpdma_ctlr destroys all of them. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e6bb0ecb12c7..53190894f17a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev) clean_ale_ret: cpsw_ale_destroy(priv->ale); clean_dma_ret: - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); @@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); pm_runtime_disable(&pdev->dev); device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device); From d16c0d722d09496a03222dc27ee3071b7b1051e5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:38 -0500 Subject: [PATCH 0503/1050] ARM: OMAP: DRA7: powerdomain data: Set L3init and L4per to ON As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), L4Per and L3init power domains now operate in always "ON" mode due to asymmetric aging limitations. Update the same [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 0ec2d00f4237..8ea447ed4dc4 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -111,7 +111,7 @@ static struct powerdomain l4per_7xx_pwrdm = { .name = "l4per_pwrdm", .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { @@ -260,7 +260,7 @@ static struct powerdomain l3init_7xx_pwrdm = { .name = "l3init_pwrdm", .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { From 9ffb668f268c79f2f58b56bbd63208440b31260f Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:39 -0500 Subject: [PATCH 0504/1050] ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_logic_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain (and CPUx sub power domains), all other power domains can either operate in "ON" mode OR in some cases, "OFF" mode. For these power states, the logic retention state is basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 8ea447ed4dc4..88107b449710 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -36,7 +36,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IVA_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 4, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* hwa_mem */ @@ -76,7 +75,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IPU_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* aessmem */ @@ -95,7 +93,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_DSS_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* dss_mem */ @@ -112,7 +109,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* nonretained_bank */ @@ -161,7 +157,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_CORE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 5, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* core_nret_bank */ @@ -226,7 +221,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_VPE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* vpe_bank */ @@ -261,7 +255,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* gmac_bank */ From 6b41d44862e8f3a4b95102c6ff6cad3fccc7994b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:40 -0500 Subject: [PATCH 0505/1050] ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_mem_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain, all other power domains do not have memories capable of retention since they all operate in either "ON" or "OFF" mode. For these power states, the retention state for memories are basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 65 ---------------------- 1 file changed, 65 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 88107b449710..eb350a673133 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -37,12 +37,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 4, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* hwa_mem */ - [1] = PWRSTS_OFF_RET, /* sl2_mem */ - [2] = PWRSTS_OFF_RET, /* tcm1_mem */ - [3] = PWRSTS_OFF_RET, /* tcm2_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* hwa_mem */ [1] = PWRSTS_ON, /* sl2_mem */ @@ -76,10 +70,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* aessmem */ - [1] = PWRSTS_OFF_RET, /* periphmem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* aessmem */ [1] = PWRSTS_ON, /* periphmem */ @@ -94,9 +84,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dss_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dss_mem */ }, @@ -110,10 +97,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* nonretained_bank */ - [1] = PWRSTS_OFF_RET, /* retained_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* nonretained_bank */ [1] = PWRSTS_ON, /* retained_bank */ @@ -128,9 +111,6 @@ static struct powerdomain gpu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gpu_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gpu_mem */ }, @@ -144,8 +124,6 @@ static struct powerdomain wkupaon_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 1, - .pwrsts_mem_ret = { - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* wkup_bank */ }, @@ -158,13 +136,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 5, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* core_nret_bank */ - [1] = PWRSTS_OFF_RET, /* core_ocmram */ - [2] = PWRSTS_OFF_RET, /* core_other_bank */ - [3] = PWRSTS_OFF_RET, /* ipu_l2ram */ - [4] = PWRSTS_OFF_RET, /* ipu_unicache */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* core_nret_bank */ [1] = PWRSTS_ON, /* core_ocmram */ @@ -222,9 +193,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vpe_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vpe_bank */ }, @@ -256,11 +224,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gmac_bank */ - [1] = PWRSTS_OFF_RET, /* l3init_bank1 */ - [2] = PWRSTS_OFF_RET, /* l3init_bank2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gmac_bank */ [1] = PWRSTS_ON, /* l3init_bank1 */ @@ -276,9 +239,6 @@ static struct powerdomain eve3_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve3_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve3_bank */ }, @@ -292,9 +252,6 @@ static struct powerdomain emu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* emu_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* emu_bank */ }, @@ -307,11 +264,6 @@ static struct powerdomain dsp2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp2_edma */ - [1] = PWRSTS_OFF_RET, /* dsp2_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp2_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp2_edma */ [1] = PWRSTS_ON, /* dsp2_l1 */ @@ -327,11 +279,6 @@ static struct powerdomain dsp1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp1_edma */ - [1] = PWRSTS_OFF_RET, /* dsp1_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp1_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp1_edma */ [1] = PWRSTS_ON, /* dsp1_l1 */ @@ -347,9 +294,6 @@ static struct powerdomain cam_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vip_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vip_bank */ }, @@ -363,9 +307,6 @@ static struct powerdomain eve4_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve4_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve4_bank */ }, @@ -379,9 +320,6 @@ static struct powerdomain eve2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve2_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve2_bank */ }, @@ -395,9 +333,6 @@ static struct powerdomain eve1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve1_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve1_bank */ }, From 3bfbb4d1a480cc17f6ccfce13b76eb6c0dbeaf8c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Jun 2016 11:23:15 +0100 Subject: [PATCH 0506/1050] regulator: qcom_smd: add list_voltage callback This patch adds support to list_voltage callback, so that consumers like mmc core, can get information of supported voltage range. Without this patch there is no way for mmc core to know this voltage range. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 6c7fe4778793..526bf23dcb49 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -152,6 +152,7 @@ static const struct regulator_ops rpm_smps_ldo_ops_fixed = { .enable = rpm_reg_enable, .disable = rpm_reg_disable, .is_enabled = rpm_reg_is_enabled, + .list_voltage = regulator_list_voltage_linear_range, .get_voltage = rpm_reg_get_voltage, .set_voltage = rpm_reg_set_voltage, From 9aeb26cfc2abc96be42b9df2d0f2dc5d805084ff Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 3 Jun 2016 11:50:30 +0100 Subject: [PATCH 0507/1050] iommu/arm-smmu: Wire up map_sg for arm-smmu-v3 The map_sg callback is missing from arm_smmu_ops, but is required by iommu.h. Similarly to most other IOMMU drivers, connect it to default_iommu_map_sg. Cc: Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 94b68213c50d..5f6b3bcab078 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1941,6 +1941,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, + .map_sg = default_iommu_map_sg, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, From 975f57fdff1d0eb9816806cabd27162a8a1a4038 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 10 Jun 2016 16:47:02 +1000 Subject: [PATCH 0508/1050] crypto: vmx - Fix ABI detection When calling ppc-xlate.pl, we pass it either linux-ppc64 or linux-ppc64le. The script however was expecting linux64le, a result of its OpenSSL origins. This means we aren't obeying the ppc64le ABIv2 rules. Fix this by checking for linux-ppc64le. Fixes: 5ca55738201c ("crypto: vmx - comply with ABIs that specify vrsave as reserved.") Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Herbert Xu --- drivers/crypto/vmx/ppc-xlate.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl index 9f4994cabcc7..b18e67d0e065 100644 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ b/drivers/crypto/vmx/ppc-xlate.pl @@ -141,7 +141,7 @@ my $vmr = sub { # Some ABIs specify vrsave, special-purpose register #256, as reserved # for system use. -my $no_vrsave = ($flavour =~ /aix|linux64le/); +my $no_vrsave = ($flavour =~ /linux-ppc64le/); my $mtspr = sub { my ($f,$idx,$ra) = @_; if ($idx == 256 && $no_vrsave) { From 12d3f49e1ffbbf8cbbb60acae5a21103c5c841ac Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 10 Jun 2016 16:47:03 +1000 Subject: [PATCH 0509/1050] crypto: vmx - Increase priority of aes-cbc cipher All of the VMX AES ciphers (AES, AES-CBC and AES-CTR) are set at priority 1000. Unfortunately this means we never use AES-CBC and AES-CTR, because the base AES-CBC cipher that is implemented on top of AES inherits its priority. To fix this, AES-CBC and AES-CTR have to be a higher priority. Set them to 2000. Testing on a POWER8 with: cryptsetup benchmark --cipher aes --key-size 256 Shows decryption speed increase from 402.4 MB/s to 3069.2 MB/s, over 7x faster. Thanks to Mike Strosaker for helping me debug this issue. Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module") Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aes_cbc.c | 2 +- drivers/crypto/vmx/aes_ctr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 495577b6d31b..94ad5c0adbcb 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = &crypto_blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 0a3c1b04cf3c..38ed10d761d0 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = &crypto_blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, From 19ced623db2fe91604d69f7d86b03144c5107739 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jun 2016 14:56:39 +0200 Subject: [PATCH 0510/1050] crypto: ux500 - memmove the right size The hash buffer is really HASH_BLOCK_SIZE bytes, someone must have thought that memmove takes n*u32 words by mistake. Tests work as good/bad as before after this patch. Cc: Joakim Bech Cc: stable@vger.kernel.org Reported-by: David Binderman Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ux500/hash/hash_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 574e87c7f2b8..9acccad26928 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data, &device_data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); From 053ae6499a5634c0dc5fa18437e1af3d2f2ec98e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 13 Jun 2016 13:48:53 +0800 Subject: [PATCH 0511/1050] gpio: 104-idi-48: Fix missing spin_lock_init for ack_lock Fixes: 9ae482104cb9 ("gpio: 104-idi-48: Clear pending interrupt once in IRQ handler") Signed-off-by: Axel Lin Acked-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-104-idi-48.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c index 6c75c83baf5a..2d2763ea1a68 100644 --- a/drivers/gpio/gpio-104-idi-48.c +++ b/drivers/gpio/gpio-104-idi-48.c @@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id) idi48gpio->irq = irq[id]; spin_lock_init(&idi48gpio->lock); + spin_lock_init(&idi48gpio->ack_lock); dev_set_drvdata(dev, idi48gpio); From ae4ea9a2460c7fee2ae8feeb4dfe96f5f6c3e562 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Fri, 10 Jun 2016 04:31:52 +0000 Subject: [PATCH 0512/1050] ipmi: Remove smi_msg from waiting_rcv_msgs list before handle_one_recv_msg() Commit 7ea0ed2b5be8 ("ipmi: Make the message handler easier to use for SMI interfaces") changed handle_new_recv_msgs() to call handle_one_recv_msg() for a smi_msg while the smi_msg is still connected to waiting_rcv_msgs list. That could lead to following list corruption problems: 1) low-level function treats smi_msg as not connected to list handle_one_recv_msg() could end up calling smi_send(), which assumes the msg is not connected to list. For example, the following sequence could corrupt list by doing list_add_tail() for the entry still connected to other list. handle_new_recv_msgs() msg = list_entry(waiting_rcv_msgs) handle_one_recv_msg(msg) handle_ipmb_get_msg_cmd(msg) smi_send(msg) spin_lock(xmit_msgs_lock) list_add_tail(msg) spin_unlock(xmit_msgs_lock) 2) race between multiple handle_new_recv_msgs() instances handle_new_recv_msgs() once releases waiting_rcv_msgs_lock before calling handle_one_recv_msg() then retakes the lock and list_del() it. If others call handle_new_recv_msgs() during the window shown below list_del() will be done twice for the same smi_msg. handle_new_recv_msgs() spin_lock(waiting_rcv_msgs_lock) msg = list_entry(waiting_rcv_msgs) spin_unlock(waiting_rcv_msgs_lock) | | handle_one_recv_msg(msg) | spin_lock(waiting_rcv_msgs_lock) list_del(msg) spin_unlock(waiting_rcv_msgs_lock) Fixes: 7ea0ed2b5be8 ("ipmi: Make the message handler easier to use for SMI interfaces") Signed-off-by: Jun'ichi Nomura [Added a comment to describe why this works.] Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org # 3.19 Tested-by: Ye Feng --- drivers/char/ipmi/ipmi_msghandler.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 94fb407d8561..44b1bd6baa38 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3820,6 +3820,7 @@ static void handle_new_recv_msgs(ipmi_smi_t intf) while (!list_empty(&intf->waiting_rcv_msgs)) { smi_msg = list_entry(intf->waiting_rcv_msgs.next, struct ipmi_smi_msg, link); + list_del(&smi_msg->link); if (!run_to_completion) spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock, flags); @@ -3829,11 +3830,14 @@ static void handle_new_recv_msgs(ipmi_smi_t intf) if (rv > 0) { /* * To preserve message order, quit if we - * can't handle a message. + * can't handle a message. Add the message + * back at the head, this is safe because this + * tasklet is the only thing that pulls the + * messages. */ + list_add(&smi_msg->link, &intf->waiting_rcv_msgs); break; } else { - list_del(&smi_msg->link); if (rv == 0) /* Message handled */ ipmi_free_smi_msg(smi_msg); From 1c343f7b0e177e8ca7f4d4a5dd1fa790f85abbcc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 13 Jun 2016 13:14:56 +0200 Subject: [PATCH 0513/1050] KVM: s390/mm: Fix CMMA reset during reboot commit 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") factored out the page table handling code from __gmap_zap and __s390_reset_cmma into ptep_zap_unused and added a simple flag that tells which one of the function (reset or not) is to be made. This also changed the behaviour, as it also zaps unused page table entries on reset. Turns out that this is wrong as s390_reset_cmma uses the page walker, which DOES NOT take the ptl lock. The most simple fix is to not do the zapping part on reset (which uses the walker) Signed-off-by: Christian Borntraeger Fixes: 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87f9398..9f0ce0e6eeb4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; - if (pte_swap(pte) && + if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); From ccaa2c12fba72f3e547d18e66820e2e6c5883113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 7 Jun 2016 17:43:04 -0400 Subject: [PATCH 0514/1050] drm/radeon: do not hard reset GPU while freezing on r600/r700 family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems r600/r700 does not like hard reset while freezing for hibernation (regression due to 274ad65c9d02bdcbee9bae045517864c3521d530 which itself is a fix for hibernation on some GPU families). Until i can debug further issue with r600, let just disable this for r600/r700 as they are very similar family and bug affecting one likely affect the other. Reviewed-by: Christian König Signed-off-by: Jérôme Glisse Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e721e6b2766e..e61c7636864d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1631,7 +1631,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, radeon_agp_suspend(rdev); pci_save_state(dev->pdev); - if (freeze && rdev->family >= CHIP_R600) { + if (freeze && rdev->family >= CHIP_CEDAR) { rdev->asic->asic_reset(rdev, true); pci_restore_state(dev->pdev); } else if (suspend) { From 9ef8537e68941d858924a3eacee5a1945767cbab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 13 Jun 2016 16:09:53 +0200 Subject: [PATCH 0515/1050] drm/radeon: don't use fractional dividers on RS[78]80 if SS is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems to cause problems for some older hardware. Kudos to Thom Kouwenhoven for working a lot with the PLLs and figuring this out. Reviewed-by: Alex Deucher Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_crtc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 2e216e2ea78c..259cd6e6d71c 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -589,7 +589,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev) || ASIC_IS_DCE8(rdev)) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; /* use frac fb div on RS780/RS880 */ - if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880)) + if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880)) + && !radeon_crtc->ss_enabled) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; if (ASIC_IS_DCE32(rdev) && mode->clock > 165000) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; @@ -626,7 +627,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, if (radeon_crtc->ss.refdiv) { radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV; radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv; - if (ASIC_IS_AVIVO(rdev)) + if (rdev->family >= CHIP_RV770) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; } } From 048765ad5af7c8939603b4c6cb96293ffa05e00d Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Sat, 11 Jun 2016 02:51:32 -0400 Subject: [PATCH 0516/1050] amdgpu: fix asic initialization for virtualized environments (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When executing in a PCI passthrough based virtuzliation environemnt, the hypervisor will usually attempt to send a PCIe bus reset signal to the ASIC when the VM reboots. In this scenario, the card is not correctly initialized, but we still consider it to be posted. Therefore, in a passthrough based environemnt we should always post the card to guarantee it is in a good state for driver initialization. However, if we are operating in SR-IOV mode it is up to the GIM driver to manage the asic state, therefore we should not post the card (and shouldn't be able to do it either). v2: add missing semi-colon Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++++++++++++++++- drivers/gpu/drm/amd/amdgpu/cik.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/vi.c | 15 +++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 01c36b8d6222..70af26d97d28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1822,6 +1822,8 @@ struct amdgpu_asic_funcs { /* MM block clocks */ int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); + /* query virtual capabilities */ + u32 (*get_virtual_caps)(struct amdgpu_device *adev); }; /* @@ -1916,8 +1918,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); /* GPU virtualization */ +#define AMDGPU_VIRT_CAPS_SRIOV_EN (1 << 0) +#define AMDGPU_VIRT_CAPS_IS_VF (1 << 1) struct amdgpu_virtualization { bool supports_sr_iov; + bool is_virtual; + u32 caps; }; /* @@ -2206,6 +2212,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) +#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev))) #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev)) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 964f31404f17..66482b429458 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1385,6 +1385,15 @@ static int amdgpu_resume(struct amdgpu_device *adev) return 0; } +static bool amdgpu_device_is_virtual(void) +{ +#ifdef CONFIG_X86 + return boot_cpu_has(X86_FEATURE_HYPERVISOR); +#else + return false; +#endif +} + /** * amdgpu_device_init - initialize the driver * @@ -1519,8 +1528,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->virtualization.supports_sr_iov = amdgpu_atombios_has_gpu_virtualization_table(adev); + /* Check if we are executing in a virtualized environment */ + adev->virtualization.is_virtual = amdgpu_device_is_virtual(); + adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev); + /* Post card if necessary */ - if (!amdgpu_card_posted(adev)) { + if (!amdgpu_card_posted(adev) || + (adev->virtualization.is_virtual && + !adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN)) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07bc795a4ca9..910431808542 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, return true; } +static u32 cik_get_virtual_caps(struct amdgpu_device *adev) +{ + /* CIK does not support SR-IOV */ + return 0; +} + static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { {mmGRBM_STATUS, false}, {mmGB_ADDR_CONFIG, false}, @@ -2007,6 +2013,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, + .get_virtual_caps = &cik_get_virtual_caps, /* these should be moved to their own ip modules */ .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, .wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 2c88d0b66cf3..a65c96029476 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -421,6 +421,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, return true; } +static u32 vi_get_virtual_caps(struct amdgpu_device *adev) +{ + u32 caps = 0; + u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); + + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE)) + caps |= AMDGPU_VIRT_CAPS_SRIOV_EN; + + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER)) + caps |= AMDGPU_VIRT_CAPS_IS_VF; + + return caps; +} + static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { {mmGB_MACROTILE_MODE7, true}, }; @@ -1118,6 +1132,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, + .get_virtual_caps = &vi_get_virtual_caps, /* these should be moved to their own ip modules */ .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, .wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle, From 05082b8bbd1a0ffc74235449c4b8930a8c240f85 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2016 15:37:34 -0400 Subject: [PATCH 0517/1050] drm/radeon: fix asic initialization for virtualized environments When executing in a PCI passthrough based virtuzliation environment, the hypervisor will usually attempt to send a PCIe bus reset signal to the ASIC when the VM reboots. In this scenario, the card is not correctly initialized, but we still consider it to be posted. Therefore, in a passthrough based environemnt we should always post the card to guarantee it is in a good state for driver initialization. Ported from amdgpu commit: amdgpu: fix asic initialization for virtualized environments Cc: Andres Rodriguez Cc: Alex Williamson Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_device.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e61c7636864d..21c44b2293bc 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -630,6 +630,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) /* * GPU helpers function. */ + +/** + * radeon_device_is_virtual - check if we are running is a virtual environment + * + * Check if the asic has been passed through to a VM (all asics). + * Used at driver startup. + * Returns true if virtual or false if not. + */ +static bool radeon_device_is_virtual(void) +{ +#ifdef CONFIG_X86 + return boot_cpu_has(X86_FEATURE_HYPERVISOR); +#else + return false; +#endif +} + /** * radeon_card_posted - check if the hw has already been initialized * @@ -643,6 +660,10 @@ bool radeon_card_posted(struct radeon_device *rdev) { uint32_t reg; + /* for pass through, always force asic_init */ + if (radeon_device_is_virtual()) + return false; + /* required for EFI mode on macbook2,1 which uses an r5xx asic */ if (efi_enabled(EFI_BOOT) && (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && From 8b18300c13a1e08e152f6b6a430faac84f986231 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2016 18:26:24 -0400 Subject: [PATCH 0518/1050] drm/amdgpu/gfx7: fix broken condition check Wrong operator. Reported-by: David Binderman Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8c6ad1e72f02..fc8ff4d3ccf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4833,7 +4833,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, case 2: for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - if ((ring->me == me_id) & (ring->pipe == pipe_id)) + if ((ring->me == me_id) && (ring->pipe == pipe_id)) amdgpu_fence_process(ring); } break; From b046302a1dbbc49102da50000a22f4c69b1d700e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 6 Jun 2016 10:51:40 -0300 Subject: [PATCH 0519/1050] MAINTAINERS: Add myself as reviewer of ARM FSL/NXP I would like to help reviewing FSL/NXP ARM architecture patches. Signed-off-by: Fabio Estevam Acked-by: Shawn Guo Signed-off-by: Olof Johansson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..79a344505861 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1159,6 +1159,7 @@ F: arch/arm/mach-footbridge/ ARM/FREESCALE IMX / MXC ARM ARCHITECTURE M: Shawn Guo M: Sascha Hauer +R: Fabio Estevam L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git From 7c4021d403ca72ce52d39c17d8154974521a82be Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2016 18:59:17 -0400 Subject: [PATCH 0520/1050] Revert "drm/amdgpu: add pipeline sync while vmid switch in same ctx" This reverts commit 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989. The issue fixed by this patch is specific to compute rings and the previous patch was enough. Additionally, this patch as been traced to strange behavior on some CZ systems so we might as well drop it. --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 ++------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70af26d97d28..e055d5be1c3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -799,7 +799,6 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; - int vmid; }; /* @@ -937,8 +936,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size, - bool vmid_switch); + uint32_t oa_base, uint32_t oa_size); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7a0b1e50f293..34e35423b78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -122,7 +122,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; - int vmid = 0, old_vmid = ring->vmid; struct fence *hwf; uint64_t ctx; @@ -136,11 +135,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job) { vm = job->vm; ctx = job->ctx; - vmid = job->vm_id; } else { vm = NULL; ctx = 0; - vmid = 0; } if (!ring->ready) { @@ -166,8 +163,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, - job->oa_base, job->oa_size, - (ring->current_ctx == ctx) && (old_vmid != vmid)); + job->oa_base, job->oa_size); if (r) { amdgpu_ring_undo(ring); return r; @@ -184,6 +180,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; + /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; @@ -191,7 +188,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; - ring->vmid = vmid; } if (ring->funcs->emit_hdp_invalidate) @@ -202,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); - ring->vmid = old_vmid; amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 62a4c127620f..9f36ed30ba11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -298,8 +298,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size, - bool vmid_switch) + uint32_t oa_base, uint32_t oa_size) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; @@ -313,7 +312,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, int r; if (ring->funcs->emit_pipeline_sync && ( - pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch)) + pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || + ring->type == AMDGPU_RING_TYPE_COMPUTE)) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && From 7bc364097a89a0a9a5e5e4989d6b3e6fb2027a9e Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 10 May 2016 12:39:44 +0200 Subject: [PATCH 0521/1050] mcb: Acquire reference to device in probe mcb_probe() does not aqcuire a reference to the probed device but drops one when removing the device. As it is actually using the device, it should grab a reference via get_device(). This could lead to a panic found with a rmmod/modprobe stress test Signed-off-by: Johannes Thumshirn Reported-by: Andreas Werner Tested-by: Andreas Werner Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index b73c6e7d28e4..f5923c253f7e 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -66,6 +66,7 @@ static int mcb_probe(struct device *dev) if (!found_id) return -ENODEV; + get_device(dev); return mdrv->probe(mdev, found_id); } From 4d2ec8575357d4afc965564e2e910a72fe608d39 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 10 May 2016 12:39:45 +0200 Subject: [PATCH 0522/1050] mcb: Acquire reference to carrier module in core Acquire a reference to the carrier's kernel module in bus code, so it can't be removed from the kernel while it still has a bus and thus possibly devices attached to it. Signed-off-by: Johannes Thumshirn Reported-by: Andreas Werner Tested-by: Andreas Werner Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index f5923c253f7e..6f2c8522e14a 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -61,22 +61,36 @@ static int mcb_probe(struct device *dev) struct mcb_driver *mdrv = to_mcb_driver(dev->driver); struct mcb_device *mdev = to_mcb_device(dev); const struct mcb_device_id *found_id; + struct module *carrier_mod; + int ret; found_id = mcb_match_id(mdrv->id_table, mdev); if (!found_id) return -ENODEV; + carrier_mod = mdev->dev.parent->driver->owner; + if (!try_module_get(carrier_mod)) + return -EINVAL; + get_device(dev); - return mdrv->probe(mdev, found_id); + ret = mdrv->probe(mdev, found_id); + if (ret) + module_put(carrier_mod); + + return ret; } static int mcb_remove(struct device *dev) { struct mcb_driver *mdrv = to_mcb_driver(dev->driver); struct mcb_device *mdev = to_mcb_device(dev); + struct module *carrier_mod; mdrv->remove(mdev); + carrier_mod = mdev->dev.parent->driver->owner; + module_put(carrier_mod); + put_device(&mdev->dev); return 0; From 8550e2fa34f077c8a87cf1ba2453102bbbc9ade9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:55 +0530 Subject: [PATCH 0523/1050] powerpc/mm/hash: Use the correct PPP mask when updating HPTE With commit e58e87adc8bf9 "powerpc/mm: Update _PAGE_KERNEL_RO" we now use all the three PPP bits. The top bit is now used to have a PPP value of 0b110 which will be mapped to kernel read only. When updating the hpte entry use right mask such that we update the 63rd bit (top 'P' bit) too. Prior to e58e87adc8bf we didn't support KERNEL_RO at all (it was == KERNEL_RW), so this isn't a regression as such. Fixes: e58e87adc8bf ("powerpc/mm: Update _PAGE_KERNEL_RO") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/mm/hash_native_64.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 290157e8d5b2..74839f24f412 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -88,6 +88,7 @@ #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) #define HPTE_R_G ASM_CONST(0x0000000000000008) #define HPTE_R_M ASM_CONST(0x0000000000000010) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 40e05e7f43de..f8a871a72985 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N | HPTE_R_C))); } native_unlock_hpte(hptep); @@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N))); + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N))); /* * Ensure it is out of the tlb too. Bolted entries base and * actual page size will be same. From ea1d39a31d3b1b6060b6e83e5a29c069a124c68a Mon Sep 17 00:00:00 2001 From: Oscar Date: Tue, 14 Jun 2016 14:14:35 +0800 Subject: [PATCH 0524/1050] usb: common: otg-fsm: add license to usb-otg-fsm Fix warning about tainted kernel because usb-otg-fsm has no license. WARNING: with this patch usb-otg-fsm module can be loaded but then the kernel will hang. Tested with a udoo quad board. Cc: #v4.1+ Signed-off-by: Oscar Signed-off-by: Peter Chen --- drivers/usb/common/usb-otg-fsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 9059b7dc185e..2f537bbdda09 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -21,6 +21,7 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm) return fsm->state_changed; } EXPORT_SYMBOL_GPL(otg_statemachine); +MODULE_LICENSE("GPL"); From ad022c87187b50f50937bd9bcd1ef312442a89af Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 12 Jun 2016 23:37:53 +0200 Subject: [PATCH 0525/1050] Revert "mtd: switch ubi_open_volume_path() to vfs_stat()" This reverts commit 322ea0bbf3003df17b6253f76e572c37d79a6810. vfs_stat() can only be used on user supplied buffers. UBI's kapi.c is the API to the kernel and therefore vfs_stat() is inappropriate. This solves the problem that mounting any UBIFS will immediately fail with -EINVAL. Cc: Al Viro Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/kapi.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 348dbbcbedc8..cc6fa0115c9a 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -301,24 +301,27 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm); */ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) { - int error, ubi_num, vol_id; - struct kstat stat; + int error, ubi_num, vol_id, mod; + struct inode *inode; + struct path path; dbg_gen("open volume %s, mode %d", pathname, mode); if (!pathname || !*pathname) return ERR_PTR(-EINVAL); - error = vfs_stat(pathname, &stat); + error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); - if (!S_ISCHR(stat.mode)) + inode = d_backing_inode(path.dentry); + mod = inode->i_mode; + ubi_num = ubi_major2num(imajor(inode)); + vol_id = iminor(inode) - 1; + path_put(&path); + + if (!S_ISCHR(mod)) return ERR_PTR(-EINVAL); - - ubi_num = ubi_major2num(MAJOR(stat.rdev)); - vol_id = MINOR(stat.rdev) - 1; - if (vol_id >= 0 && ubi_num >= 0) return ubi_open_volume(ubi_num, vol_id, mode); return ERR_PTR(-ENODEV); From 1a498ec45eeabcb246c3c3f5822ed9ac1b4f70d8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 13 Jun 2016 00:49:03 +0200 Subject: [PATCH 0526/1050] Revert "mtd: switch open_mtd_by_chdev() to use of vfs_stat()" This reverts commit 87f15d4add758fb7fc76655721af94be57a4c17d. vfs_stat() can only be used on user supplied buffers. Cc: Al Viro Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 16baeb51b2bd..7091fca0fb44 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1147,19 +1147,22 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) */ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) { - struct kstat stat; - int err, minor; + int err, major, minor, mode; + struct path path; /* Probably this is an MTD character device node path */ - err = vfs_stat(mtd_dev, &stat); + err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); if (err) return ERR_PTR(err); /* MTD device number is defined by the major / minor numbers */ - if (MAJOR(stat.rdev) != MTD_CHAR_MAJOR || !S_ISCHR(stat.mode)) + major = imajor(d_backing_inode(path.dentry)); + minor = iminor(d_backing_inode(path.dentry)); + mode = d_backing_inode(path.dentry)->i_mode; + path_put(&path); + if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) return ERR_PTR(-EINVAL); - minor = MINOR(stat.rdev); if (minor & 1) /* * Just do not think the "/dev/mtdrX" devices support is need, From 61edc3f3b51d2d3948029197cfff6fef7d94e939 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 13 Jun 2016 00:49:04 +0200 Subject: [PATCH 0527/1050] ubi: Don't bypass ->getattr() Directly accessing inode fields bypasses ->getattr() and can cause problems when the underlying filesystem does not have the default ->getattr() implementation. So instead of obtaining the backing inode via d_backing_inode() use vfs_getattr() and obtain what we need from the kstat struct. Cc: Al Viro Reported-by: Al Viro Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 16 ++++++++++------ drivers/mtd/ubi/kapi.c | 17 ++++++++++------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 7091fca0fb44..ef3618299494 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1147,22 +1147,26 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) */ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) { - int err, major, minor, mode; + int err, minor; struct path path; + struct kstat stat; /* Probably this is an MTD character device node path */ err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); if (err) return ERR_PTR(err); - /* MTD device number is defined by the major / minor numbers */ - major = imajor(d_backing_inode(path.dentry)); - minor = iminor(d_backing_inode(path.dentry)); - mode = d_backing_inode(path.dentry)->i_mode; + err = vfs_getattr(&path, &stat); path_put(&path); - if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) + if (err) + return ERR_PTR(err); + + /* MTD device number is defined by the major / minor numbers */ + if (MAJOR(stat.rdev) != MTD_CHAR_MAJOR || !S_ISCHR(stat.mode)) return ERR_PTR(-EINVAL); + minor = MINOR(stat.rdev); + if (minor & 1) /* * Just do not think the "/dev/mtdrX" devices support is need, diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index cc6fa0115c9a..a9e2cef7c95c 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -301,9 +301,9 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm); */ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) { - int error, ubi_num, vol_id, mod; - struct inode *inode; + int error, ubi_num, vol_id; struct path path; + struct kstat stat; dbg_gen("open volume %s, mode %d", pathname, mode); @@ -314,14 +314,17 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) if (error) return ERR_PTR(error); - inode = d_backing_inode(path.dentry); - mod = inode->i_mode; - ubi_num = ubi_major2num(imajor(inode)); - vol_id = iminor(inode) - 1; + error = vfs_getattr(&path, &stat); path_put(&path); + if (error) + return ERR_PTR(error); - if (!S_ISCHR(mod)) + if (!S_ISCHR(stat.mode)) return ERR_PTR(-EINVAL); + + ubi_num = ubi_major2num(MAJOR(stat.rdev)); + vol_id = MINOR(stat.rdev) - 1; + if (vol_id >= 0 && ubi_num >= 0) return ubi_open_volume(ubi_num, vol_id, mode); return ERR_PTR(-ENODEV); From cc51846ba81ca179a3be20f6313e3b72531888c1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 17 May 2016 11:12:32 +0200 Subject: [PATCH 0528/1050] pwm: atmel-hlcdc: Fix default PWM polarity The PWM device exposed by the HLCDC IP is configured with an inverted polarity by default. Registering the PWM chip with the normal polarity was not a problem before commit 42e8992c58d4 ("pwm: Add core infrastructure to allow atomic updates") because the ->set_polarity() hook was called no matter the current polarity state, but this is no longer the case. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- drivers/pwm/pwm-atmel-hlcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index f994c7eaf41c..14fc011faa32 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -272,7 +272,7 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) chip->chip.of_pwm_n_cells = 3; chip->chip.can_sleep = 1; - ret = pwmchip_add(&chip->chip); + ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED); if (ret) { clk_disable_unprepare(hlcdc->periph_clk); return ret; From b7fa30c9cc48c4f55663420472505d3b4f6e1705 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Jun 2016 15:07:50 +0200 Subject: [PATCH 0529/1050] sched/fair: Fix post_init_entity_util_avg() serialization Chris Wilson reported a divide by 0 at: post_init_entity_util_avg(): > 725 if (cfs_rq->avg.util_avg != 0) { > 726 sa->util_avg = cfs_rq->avg.util_avg * se->load.weight; > -> 727 sa->util_avg /= (cfs_rq->avg.load_avg + 1); > 728 > 729 if (sa->util_avg > cap) > 730 sa->util_avg = cap; > 731 } else { Which given the lack of serialization, and the code generated from update_cfs_rq_load_avg() is entirely possible: if (atomic_long_read(&cfs_rq->removed_load_avg)) { s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); sa->load_avg = max_t(long, sa->load_avg - r, 0); sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); removed_load = 1; } turns into: ffffffff81087064: 49 8b 85 98 00 00 00 mov 0x98(%r13),%rax ffffffff8108706b: 48 85 c0 test %rax,%rax ffffffff8108706e: 74 40 je ffffffff810870b0 ffffffff81087070: 4c 89 f8 mov %r15,%rax ffffffff81087073: 49 87 85 98 00 00 00 xchg %rax,0x98(%r13) ffffffff8108707a: 49 29 45 70 sub %rax,0x70(%r13) ffffffff8108707e: 4c 89 f9 mov %r15,%rcx ffffffff81087081: bb 01 00 00 00 mov $0x1,%ebx ffffffff81087086: 49 83 7d 70 00 cmpq $0x0,0x70(%r13) ffffffff8108708b: 49 0f 49 4d 70 cmovns 0x70(%r13),%rcx Which you'll note ends up with 'sa->load_avg - r' in memory at ffffffff8108707a. By calling post_init_entity_util_avg() under rq->lock we're sure to be fully serialized against PELT updates and cannot observe intermediate state like this. Reported-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrey Ryabinin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yuyang Du Cc: bsegall@google.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: steve.muckle@linaro.org Fixes: 2b8c41daba32 ("sched/fair: Initiate a new task's util avg to a bounded value") Link: http://lkml.kernel.org/r/20160609130750.GQ30909@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 +-- kernel/sched/fair.c | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017d5394f5dc..13d0896aff87 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2535,10 +2535,9 @@ void wake_up_new_task(struct task_struct *p) */ set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif - /* Post initialize new task's util average when its cfs_rq is set */ + rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); - rq = __task_rq_lock(p, &rf); activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..4e33ad12bb68 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8496,8 +8496,9 @@ void free_fair_sched_group(struct task_group *tg) int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { - struct cfs_rq *cfs_rq; struct sched_entity *se; + struct cfs_rq *cfs_rq; + struct rq *rq; int i; tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); @@ -8512,6 +8513,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_bandwidth(tg_cfs_bandwidth(tg)); for_each_possible_cpu(i) { + rq = cpu_rq(i); + cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) @@ -8525,7 +8528,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_rq(cfs_rq); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); init_entity_runnable_average(se); + + raw_spin_lock_irq(&rq->lock); post_init_entity_util_avg(se); + raw_spin_unlock_irq(&rq->lock); } return 1; From 797179bc4fe06c89e47a9f36f886f68640b423f8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:43 +0100 Subject: [PATCH 0530/1050] MIPS: KVM: Fix modular KVM under QEMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy __kvm_mips_vcpu_run() into unmapped memory, so that we can never get a TLB refill exception in it when KVM is built as a module. This was observed to happen with the host MIPS kernel running under QEMU, due to a not entirely transparent optimisation in the QEMU TLB handling where TLB entries replaced with TLBWR are copied to a separate part of the TLB array. Code in those pages continue to be executable, but those mappings persist only until the next ASID switch, even if they are marked global. An ASID switch happens in __kvm_mips_vcpu_run() at exception level after switching to the guest exception base. Subsequent TLB mapped kernel instructions just prior to switching to the guest trigger a TLB refill exception, which enters the guest exception handlers without updating EPC. This appears as a guest triggered TLB refill on a host kernel mapped (host KSeg2) address, which is not handled correctly as user (guest) mode accesses to kernel (host) segments always generate address error exceptions. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 1 + arch/mips/kvm/interrupt.h | 1 + arch/mips/kvm/locore.S | 1 + arch/mips/kvm/mips.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6733ac575da4..2d5bb133d11a 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -338,6 +338,7 @@ struct kvm_mips_tlb { #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); unsigned long host_stack; unsigned long host_gp; diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index 4ab4bdfad703..2143884709e4 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -28,6 +28,7 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char __kvm_mips_vcpu_run_end[]; extern char mips32_exception[], mips32_exceptionEnd[]; extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 3ef03009de5f..828fcfc1cd7f 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -202,6 +202,7 @@ FEXPORT(__kvm_mips_load_k0k1) /* Jump to guest */ eret +EXPORT(__kvm_mips_vcpu_run_end) VECTOR(MIPSX(exception), unknown) /* Find out what mode we came from and jump to the proper handler. */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index dc052fb5c7a2..44da5259f390 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -315,6 +315,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) memcpy(gebase + offset, mips32_GuestException, mips32_GuestExceptionEnd - mips32_GuestException); +#ifdef MODULE + offset += mips32_GuestExceptionEnd - mips32_GuestException; + memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run, + __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run); + vcpu->arch.vcpu_run = gebase + offset; +#else + vcpu->arch.vcpu_run = __kvm_mips_vcpu_run; +#endif + /* Invalidate the icache for these ranges */ local_flush_icache_range((unsigned long)gebase, (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); @@ -404,7 +413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Disable hardware page table walking while in guest */ htw_stop(); - r = __kvm_mips_vcpu_run(run, vcpu); + r = vcpu->arch.vcpu_run(run, vcpu); /* Re-enable HTW before enabling interrupts */ htw_start(); From 7f5a1ddc792901249c2060e165bcb3ca779cde35 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:44 +0100 Subject: [PATCH 0531/1050] MIPS: KVM: Include bit 31 in segment matches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When faulting guest addresses are matched against guest segments with the KVM_GUEST_KSEGX() macro, change the mask to 0xe0000000 so as to include bit 31. This is mainly for safety's sake, as it prevents a rogue BadVAddr in the host kseg2/kseg3 segments (e.g. 0xC*******) after a TLB exception from matching the guest kseg0 segment (e.g. 0x4*******), triggering an internal KVM error instead of allowing the corresponding guest kseg0 page to be mapped into the host vmalloc space. Such a rogue BadVAddr was observed to happen with the host MIPS kernel running under QEMU with KVM built as a module, due to a not entirely transparent optimisation in the QEMU TLB handling. This has already been worked around properly in a previous commit. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2d5bb133d11a..36a391d289aa 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -74,7 +74,7 @@ #define KVM_GUEST_KUSEG 0x00000000UL #define KVM_GUEST_KSEG0 0x40000000UL #define KVM_GUEST_KSEG23 0x60000000UL -#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) +#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) #define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0) From cc81e9486202345d6ca56495cf8b5f3d03fbc563 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:45 +0100 Subject: [PATCH 0532/1050] MIPS: KVM: Don't unwind PC when emulating CACHE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CACHE instruction is emulated by kvm_mips_emulate_cache(), the PC is first updated to point to the next instruction, and afterwards it falls through the "dont_update_pc" label, which rewinds the PC back to its original address. This works when dynamic translation of emulated instructions is enabled, since the CACHE instruction is replaced with a SYNCI which works without trapping, however when dynamic translation is disabled the guest hangs on CACHE instructions as they always trap and are never stepped over. Roughly swap the meanings of the "done" and "dont_update_pc" to match kvm_mips_emulate_CP0(), so that "done" will roll back the PC on failure, and "dont_update_pc" won't change PC at all (for the sake of exceptions that have already modified the PC). Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 396df6eb0a12..52bec0fe2fbb 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1666,7 +1666,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); - goto dont_update_pc; + goto done; } @@ -1694,16 +1694,20 @@ skip_fault: kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; - preempt_enable(); - goto dont_update_pc; } preempt_enable(); +done: + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; dont_update_pc: - /* Rollback PC */ - vcpu->arch.pc = curr_pc; -done: + /* + * This is for exceptions whose emulation updates the PC, so do not + * overwrite the PC under any circumstances + */ + return er; } From 6df82a7b88dc9b0b519765562b005ef9196d812a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:46 +0100 Subject: [PATCH 0533/1050] MIPS: KVM: Fix CACHE triggered exception emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When emulating TLB miss / invalid exceptions during CACHE instruction emulation, be sure to set up the correct PC and host_cp0_badvaddr state for the kvm_mips_emlulate_tlb*_ld() function to pick up for guest EPC and BadVAddr. PC needs to be rewound otherwise the guest EPC will end up pointing at the next instruction after the faulting CACHE instruction. host_cp0_badvaddr must be set because guest CACHE instructions trap with a Coprocessor Unusable exception, which doesn't update the host BadVAddr as a TLB exception would. This doesn't tend to get hit when dynamic translation of emulated instructions is enabled, since only the first execution of each CACHE instruction actually goes through this code path, with subsequent executions hitting the SYNCI instruction that it gets replaced with. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 52bec0fe2fbb..645c8a1982a7 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1636,6 +1636,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, if (index < 0) { vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK); vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); preempt_enable(); @@ -1647,6 +1648,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, * invalid exception to the guest */ if (!TLB_IS_VALID(*tlb, va)) { + vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); preempt_enable(); From 6d9fe44bd73d567d04d3a68a2d2fa521ab9532f2 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 13 Jun 2016 17:46:49 +0000 Subject: [PATCH 0534/1050] spi: sun4i: fix FIFO limit When testing SPI without DMA I noticed that filling the FIFO on the spi controller causes timeout. Always leave room for one byte in the FIFO. Signed-off-by: Michal Suchanek Acked-by: Maxime Ripard Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-sun4i.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index 1ddd9e2309b6..e7e4aecb3295 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -179,7 +179,10 @@ static int sun4i_spi_transfer_one(struct spi_master *master, /* We don't support transfer larger than the FIFO */ if (tfr->len > SUN4I_FIFO_DEPTH) - return -EINVAL; + return -EMSGSIZE; + + if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH) + return -EMSGSIZE; reinit_completion(&sspi->done); sspi->tx_buf = tfr->tx_buf; @@ -269,8 +272,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master, sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len)); sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len)); - /* Fill the TX FIFO */ - sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH); + /* + * Fill the TX FIFO + * Filling the FIFO fully causes timeout for some reason + * at least on spi2 on A10s + */ + sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1); /* Enable the interrupts */ sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC); From 719bd6542044efd9b338a53dba1bef45f40ca169 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 13 Jun 2016 17:46:49 +0000 Subject: [PATCH 0535/1050] spi: sunxi: fix transfer timeout The trasfer timeout is fixed at 1000 ms. Reading a 4Mbyte flash over 1MHz SPI bus takes way longer than that. Calculate the timeout from the actual time the transfer is supposed to take and multiply by 2 for good measure. Signed-off-by: Michal Suchanek Acked-by: Maxime Ripard Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-sun4i.c | 10 +++++++++- drivers/spi/spi-sun6i.c | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index e7e4aecb3295..cf007f3b83ec 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -173,6 +173,7 @@ static int sun4i_spi_transfer_one(struct spi_master *master, { struct sun4i_spi *sspi = spi_master_get_devdata(master); unsigned int mclk_rate, div, timeout; + unsigned int start, end, tx_time; unsigned int tx_len = 0; int ret = 0; u32 reg; @@ -286,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master, reg = sun4i_spi_read(sspi, SUN4I_CTL_REG); sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH); + tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U); + start = jiffies; timeout = wait_for_completion_timeout(&sspi->done, - msecs_to_jiffies(1000)); + msecs_to_jiffies(tx_time)); + end = jiffies; if (!timeout) { + dev_warn(&master->dev, + "%s: timeout transferring %u bytes@%iHz for %i(%i)ms", + dev_name(&spi->dev), tfr->len, tfr->speed_hz, + jiffies_to_msecs(end - start), tx_time); ret = -ETIMEDOUT; goto out; } diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c index 42e2c4bd690a..7fce79a60608 100644 --- a/drivers/spi/spi-sun6i.c +++ b/drivers/spi/spi-sun6i.c @@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master, { struct sun6i_spi *sspi = spi_master_get_devdata(master); unsigned int mclk_rate, div, timeout; + unsigned int start, end, tx_time; unsigned int tx_len = 0; int ret = 0; u32 reg; @@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master, reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG); sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH); + tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U); + start = jiffies; timeout = wait_for_completion_timeout(&sspi->done, - msecs_to_jiffies(1000)); + msecs_to_jiffies(tx_time)); + end = jiffies; if (!timeout) { + dev_warn(&master->dev, + "%s: timeout transferring %u bytes@%iHz for %i(%i)ms", + dev_name(&spi->dev), tfr->len, tfr->speed_hz, + jiffies_to_msecs(end - start), tx_time); ret = -ETIMEDOUT; goto out; } From 4b2312bd0592708c85ed94368c874819e7013309 Mon Sep 17 00:00:00 2001 From: Harvey Hunt Date: Mon, 23 May 2016 12:05:52 +0100 Subject: [PATCH 0536/1050] irqchip/mips-gic: Fix IRQs in gic_dev_domain When allocating a new device IRQ, gic_dev_domain_alloc() correctly calls irq_domain_set_hwirq_and_chip(), but gic_irq_domain_alloc() does not. This means that gic_irq_domain believes all IRQs from the dev domain have an hwirq of 0 and creates incorrect mappings in the linear_revmap. As gic_irq_domain is a parent of the gic_dev_domain, this leads to an inability to boot on devices with a GIC. Excerpt of the error: [ 2.297649] irq 0: nobody cared (try booting with the "irqpoll" option) ... [ 2.436963] handlers: [ 2.439492] Disabling IRQ #0 Fix this by calling irq_domain_set_hwirq_and_chip() for both the dev and irq domain. Now that we are modifying the parent domain, be sure to clear it up in case of an allocation error. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Fixes: 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain") Signed-off-by: Harvey Hunt Tested-by: Govindraj Raja # On Pistachio SoC Reviewed-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: Qais Yousef Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464001552-31174-1-git-send-email-harvey.hunt@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 3b5e10aa48ab..8a4adbeb2b8c 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, /* verify that it doesn't conflict with an IPI irq */ if (test_bit(spec->hwirq, ipi_resrv)) return -EBUSY; + + hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq); + + return irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_level_irq_controller, + NULL); } else { base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs); if (base_hwirq == gic_shared_intrs) { @@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq, &gic_level_irq_controller, NULL); if (ret) - return ret; + goto error; } return 0; + +error: + irq_domain_free_irqs_parent(d, virq, nr_irqs); + return ret; } void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, From dcfc47248d3f7d28df6f531e6426b933de94370d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jun 2016 23:06:53 +0900 Subject: [PATCH 0537/1050] kprobes/x86: Clear TF bit in fault on single-stepping Fix kprobe_fault_handler() to clear the TF (trap flag) bit of the flags register in the case of a fault fixup on single-stepping. If we put a kprobe on the instruction which caused a page fault (e.g. actual mov instructions in copy_user_*), that fault happens on the single-stepping buffer. In this case, kprobes resets running instance so that the CPU can retry execution on the original ip address. However, current code forgets to reset the TF bit. Since this fault happens with TF bit set for enabling single-stepping, when it retries, it causes a debug exception and kprobes can not handle it because it already reset itself. On the most of x86-64 platform, it can be easily reproduced by using kprobe tracer. E.g. # cd /sys/kernel/debug/tracing # echo p copy_user_enhanced_fast_string+5 > kprobe_events # echo 1 > events/kprobes/enable And you'll see a kernel panic on do_debug(), since the debug trap is not handled by kprobes. To fix this problem, we just need to clear the TF bit when resetting running kprobe. Signed-off-by: Masami Hiramatsu Reviewed-by: Ananth N Mavinakayanahalli Acked-by: Steven Rostedt Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: systemtap@sourceware.org Cc: stable@vger.kernel.org # All the way back to ancient kernels Link: http://lkml.kernel.org/r/20160611140648.25885.37482.stgit@devbox [ Updated the comments. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 38cf7a741250..7847e5c0e0b5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; + /* + * Trap flag (TF) has been set here because this fault + * happened where the single stepping will be done. + * So clear it by resetting the current kprobe: + */ + regs->flags &= ~X86_EFLAGS_TF; + + /* + * If the TF flag was set before the kprobe hit, + * don't touch it: + */ regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else From eda8dca519269c92a0771668b3d5678792de7b78 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Jun 2016 02:32:09 -0500 Subject: [PATCH 0538/1050] sched/debug: Fix deadlock when enabling sched events I see a hang when enabling sched events: echo 1 > /sys/kernel/debug/tracing/events/sched/enable The printk buffer shows: BUG: spinlock recursion on CPU#1, swapper/1/0 lock: 0xffff88007d5d8c00, .magic: dead4ead, .owner: swapper/1/0, .owner_cpu: 1 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.7.0-rc2+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 ... Call Trace: [] dump_stack+0x85/0xc2 [] spin_dump+0x78/0xc0 [] do_raw_spin_lock+0x11a/0x150 [] _raw_spin_lock+0x61/0x80 [] ? try_to_wake_up+0x256/0x4e0 [] try_to_wake_up+0x256/0x4e0 [] ? _raw_spin_unlock_irqrestore+0x4a/0x80 [] wake_up_process+0x15/0x20 [] insert_work+0x84/0xc0 [] __queue_work+0x18f/0x660 [] queue_work_on+0x46/0x90 [] drm_fb_helper_dirty.isra.11+0xcb/0xe0 [drm_kms_helper] [] drm_fb_helper_sys_imageblit+0x30/0x40 [drm_kms_helper] [] soft_cursor+0x1ad/0x230 [] bit_cursor+0x649/0x680 [] ? update_attr.isra.2+0x90/0x90 [] fbcon_cursor+0x14a/0x1c0 [] hide_cursor+0x28/0x90 [] vt_console_print+0x3bf/0x3f0 [] call_console_drivers.constprop.24+0x183/0x200 [] console_unlock+0x3d4/0x610 [] vprintk_emit+0x3c5/0x610 [] vprintk_default+0x29/0x40 [] printk+0x57/0x73 [] enqueue_entity+0xc2e/0xc70 [] enqueue_task_fair+0x59/0xab0 [] ? kvm_sched_clock_read+0x9/0x20 [] ? sched_clock+0x9/0x10 [] activate_task+0x5c/0xa0 [] ttwu_do_activate+0x54/0xb0 [] sched_ttwu_pending+0x7a/0xb0 [] scheduler_ipi+0x61/0x170 [] smp_trace_reschedule_interrupt+0x4f/0x2a0 [] trace_reschedule_interrupt+0x96/0xa0 [] ? native_safe_halt+0x6/0x10 [] ? trace_hardirqs_on+0xd/0x10 [] default_idle+0x20/0x1a0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x2f/0x50 [] cpu_startup_entry+0x37e/0x450 [] start_secondary+0x160/0x1a0 Note the hang only occurs when echoing the above from a physical serial console, not from an ssh session. The bug is caused by a deadlock where the task is trying to grab the rq lock twice because printk()'s aren't safe in sched code. Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Matt Fleming Cc: Mel Gorman Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") Link: http://lkml.kernel.org/r/20160613073209.gdvdybiruljbkn3p@treble Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4e33ad12bb68..a2348deab7a3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3246,7 +3246,7 @@ static inline void check_schedstat_required(void) trace_sched_stat_iowait_enabled() || trace_sched_stat_blocked_enabled() || trace_sched_stat_runtime_enabled()) { - pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, " + printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " "stat_blocked and stat_runtime require the " "kernel parameter schedstats=enabled or " "kernel.sched_schedstats=1\n"); From 57675cb976eff977aefb428e68e4e0236d48a9ff Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 9 Jun 2016 15:20:05 +0300 Subject: [PATCH 0539/1050] kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w Lengthy output of sysrq-w may take a lot of time on slow serial console. Currently we reset NMI-watchdog on the current CPU to avoid spurious lockup messages. Sometimes this doesn't work since softlockup watchdog might trigger on another CPU which is waiting for an IPI to proceed. We reset softlockup watchdogs on all CPUs, but we do this only after listing all tasks, and this may be too late on a busy system. So, reset watchdogs CPUs earlier, in for_each_process_thread() loop. Signed-off-by: Andrey Ryabinin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: http://lkml.kernel.org/r/1465474805-14641-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13d0896aff87..4135ac83cb65 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5147,14 +5147,16 @@ void show_state_filter(unsigned long state_filter) /* * reset the NMI-timeout, listing all files on a slow * console might take a lot of time: + * Also, reset softlockup watchdogs on all CPUs, because + * another CPU might be blocked waiting for us to process + * an IPI. */ touch_nmi_watchdog(); + touch_all_softlockup_watchdogs(); if (!state_filter || (p->state & state_filter)) sched_show_task(p); } - touch_all_softlockup_watchdogs(); - #ifdef CONFIG_SCHED_DEBUG if (!state_filter) sysrq_sched_debug_show(); From e50525bef593c3dd0564df676c567d77f7c20322 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Thu, 9 Jun 2016 11:33:55 +0530 Subject: [PATCH 0540/1050] ath10k: fix deadlock while processing rx_in_ord_ind commit 5c86d97bcc1d ("ath10k: combine txrx and replenish task") introduced deadlock while processing rx in order indication message for qca6174 based devices. While merging replenish and txrx tasklets, replenish task should be called out of htt rx ring locking since it is also try to acquire the same lock. Unfortunately this issue is not exposed by other solutions (qca988x, qca99x0 & qca4019), as rx_in_ord_ind message is specific to qca6174 based devices. This patch fixes ============================================= [ INFO: possible recursive locking detected ] 4.7.0-rc2-wt-ath+ #1353 Tainted: G E --------------------------------------------- swapper/3/0 is trying to acquire lock: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_rx_msdu_buff_replenish+0x29/0x90 [ath10k_core] but task is already holding lock: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&htt->rx_ring.lock)->rlock); lock(&(&htt->rx_ring.lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by swapper/3/0: #0: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=119151 Fixes: 5c86d97bcc1d ("ath10k: combine txrx and replenish task") Reported-by: Mike Lothian Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index cc979a4faeb0..813cdd2621a1 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1904,7 +1904,6 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) return; } } - ath10k_htt_rx_msdu_buff_replenish(htt); } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, From e024111f6946f45cf1559a8c6fd48d2d0f696d07 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 7 Jun 2016 15:47:07 +0300 Subject: [PATCH 0541/1050] ath9k: fix GPIO mask for AR9462 and AR9565 The incorrect GPIO mask cause kernel warning, when AR9462 access GPIO11. Also fix the mask for AR9565. WARNING: CPU: 1 PID: 199 at ../drivers/net/wireless/ath/ath9k/hw.c:2778 ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw] CPU: 1 PID: 199 Comm: kworker/u16:9 Not tainted 4.7.0-rc1-next-20160530+ #5 Hardware name: Acer TravelMate P243/BA40_HC, BIOS V1.01 04/20/2012 Workqueue: events_power_efficient rfkill_poll 0000000000000000 ffff88002cf73d28 ffffffff813b8ddc 0000000000000000 0000000000000000 ffff88002cf73d68 ffffffff8107a331 00000ada00000086 ffff880148d9c018 000000000000000b ffff880147e68720 0000000000000200 Call Trace: [] dump_stack+0x63/0x87 [] __warn+0xd1/0xf0 [] warn_slowpath_null+0x1d/0x20 [] ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw] [] ath9k_rfkill_poll_state+0x34/0x60 [ath9k] [] ieee80211_rfkill_poll+0x33/0x40 [mac80211] [] cfg80211_rfkill_poll+0x2a/0xc0 [cfg80211] [] rfkill_poll+0x24/0x50 [] process_one_work+0x153/0x3f0 [] worker_thread+0x12b/0x4b0 [] ? rescuer_thread+0x340/0x340 [] kthread+0xc9/0xe0 [] ret_from_fork+0x1f/0x40 [] ? kthread_park+0x60/0x60 Fixes: a01ab81b09c5 ("ath9k: define correct GPIO numbers and bits mask") Reported-by: Sudip Mukherjee Tested-by: Sudip Mukherjee Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/reg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index 9272ca90632b..80ff69f99229 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -1122,12 +1122,12 @@ enum { #define AR9300_NUM_GPIO 16 #define AR9330_NUM_GPIO 16 #define AR9340_NUM_GPIO 23 -#define AR9462_NUM_GPIO 10 +#define AR9462_NUM_GPIO 14 #define AR9485_NUM_GPIO 12 #define AR9531_NUM_GPIO 18 #define AR9550_NUM_GPIO 24 #define AR9561_NUM_GPIO 23 -#define AR9565_NUM_GPIO 12 +#define AR9565_NUM_GPIO 14 #define AR9580_NUM_GPIO 16 #define AR7010_NUM_GPIO 16 @@ -1139,12 +1139,12 @@ enum { #define AR9300_GPIO_MASK 0x0000F4FF #define AR9330_GPIO_MASK 0x0000F4FF #define AR9340_GPIO_MASK 0x0000000F -#define AR9462_GPIO_MASK 0x000003FF +#define AR9462_GPIO_MASK 0x00003FFF #define AR9485_GPIO_MASK 0x00000FFF #define AR9531_GPIO_MASK 0x0000000F #define AR9550_GPIO_MASK 0x0000000F #define AR9561_GPIO_MASK 0x0000000F -#define AR9565_GPIO_MASK 0x00000FFF +#define AR9565_GPIO_MASK 0x00003FFF #define AR9580_GPIO_MASK 0x0000F4FF #define AR7010_GPIO_MASK 0x0000FFFF From c5cea06be060f38e5400d796e61cfc8c36e52924 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 11:15:14 +0100 Subject: [PATCH 0542/1050] arm64: fix dump_instr when PAN and UAO are in use If the kernel is set to show unhandled signals, and a user task does not handle a SIGILL as a result of an instruction abort, we will attempt to log the offending instruction with dump_instr before killing the task. We use dump_instr to log the encoding of the offending userspace instruction. However, dump_instr is also used to dump instructions from kernel space, and internally always switches to KERNEL_DS before dumping the instruction with get_user. When both PAN and UAO are in use, reading a user instruction via get_user while in KERNEL_DS will result in a permission fault, which leads to an Oops. As we have regs corresponding to the context of the original instruction abort, we can inspect this and only flip to KERNEL_DS if the original abort was taken from the kernel, avoiding this issue. At the same time, remove the redundant (and incorrect) comments regarding the order dump_mem and dump_instr are called in. Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: #4.6+ Signed-off-by: Mark Rutland Reported-by: Vladimir Murzin Tested-by: Vladimir Murzin Fixes: 57f4959bad0a154a ("arm64: kernel: Add support for User Access Override") Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f7cf463107df..2a43012616b7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) From bbb1681ee3653bdcfc6a4ba31902738118311fd4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 17:57:02 +0100 Subject: [PATCH 0543/1050] arm64: mm: mark fault_info table const Unlike the debug_fault_info table, we never intentionally alter the fault_info table at runtime, and all derived pointers are treated as const currently. Make the table const so that it can be placed in .rodata and protected from unintentional writes, as we do for the syscall tables. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ba3fc12bd272..013e2cbe7924 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -441,7 +441,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; From eee930163c6a2507aa3b41edf8f7e3b7a2049769 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Mar 2016 11:47:11 +0100 Subject: [PATCH 0544/1050] nfsd: Fix NFSD_MDS_PR_KEY on 32-bit by adding ULL postfix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit: fs/nfsd/blocklayout.c: In function ‘nfsd4_block_get_device_info_scsi’: fs/nfsd/blocklayout.c:337: warning: integer constant is too large for ‘long’ type fs/nfsd/blocklayout.c:344: warning: integer constant is too large for ‘long’ type fs/nfsd/blocklayout.c: In function ‘nfsd4_scsi_fence_client’: fs/nfsd/blocklayout.c:385: warning: integer constant is too large for ‘long’ type Add the missing "ULL" postfix to 64-bit constant NFSD_MDS_PR_KEY to fix this. Fixes: f99d4fbdae6765d0 ("nfsd: add SCSI layout support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/blocklayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e55b5242614d..31f3df193bdb 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -290,7 +290,7 @@ out_free_buf: return error; } -#define NFSD_MDS_PR_KEY 0x0100000000000000 +#define NFSD_MDS_PR_KEY 0x0100000000000000ULL /* * We use the client ID as a unique key for the reservations. From 5bc28b93a36e3cb3acc2870fb75cb6ffb182fece Mon Sep 17 00:00:00 2001 From: Rhyland Klein Date: Thu, 9 Jun 2016 17:28:39 -0400 Subject: [PATCH 0545/1050] power_supply: power_supply_read_temp only if use_cnt > 0 Change power_supply_read_temp() to use power_supply_get_property() so that it will check the use_cnt and ensure it is > 0. The use_cnt will be incremented at the end of __power_supply_register, so this will block to case where get_property can be called before the supply is fully registered. This fixes the issue show in the stack below: [ 1.452598] power_supply_read_temp+0x78/0x80 [ 1.458680] thermal_zone_get_temp+0x5c/0x11c [ 1.464765] thermal_zone_device_update+0x34/0xb4 [ 1.471195] thermal_zone_device_register+0x87c/0x8cc [ 1.477974] __power_supply_register+0x364/0x424 [ 1.484317] power_supply_register_no_ws+0x10/0x18 [ 1.490833] bq27xxx_battery_setup+0x10c/0x164 [ 1.497003] bq27xxx_battery_i2c_probe+0xd0/0x1b0 [ 1.503435] i2c_device_probe+0x174/0x240 [ 1.509172] driver_probe_device+0x1fc/0x29c [ 1.515167] __driver_attach+0xa4/0xa8 [ 1.520643] bus_for_each_dev+0x58/0x98 [ 1.526204] driver_attach+0x20/0x28 [ 1.531505] bus_add_driver+0x1c8/0x22c [ 1.537067] driver_register+0x68/0x108 [ 1.542630] i2c_register_driver+0x38/0x7c [ 1.548457] bq27xxx_battery_i2c_driver_init+0x18/0x20 [ 1.555321] do_one_initcall+0x38/0x12c [ 1.560886] kernel_init_freeable+0x148/0x1ec [ 1.566972] kernel_init+0x10/0xfc [ 1.572101] ret_from_fork+0x10/0x40 Also make the same change to ps_get_max_charge_cntl_limit() and ps_get_cur_chrage_cntl_limit() to be safe. Lastly, change the return value of power_supply_get_property() to -EAGAIN from -ENODEV if use_cnt <= 0. Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Cc: stable@vger.kernel.org Signed-off-by: Rhyland Klein Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- drivers/power/power_supply_core.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 456987c88baa..b13cd074c52a 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -565,11 +565,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd, WARN_ON(tzd == NULL); psy = tzd->devdata; - ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val); + ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val); + if (ret) + return ret; /* Convert tenths of degree Celsius to milli degree Celsius. */ - if (!ret) - *temp = val.intval * 100; + *temp = val.intval * 100; return ret; } @@ -612,10 +613,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd, int ret; psy = tcd->devdata; - ret = psy->desc->get_property(psy, - POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val); - if (!ret) - *state = val.intval; + ret = power_supply_get_property(psy, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val); + if (ret) + return ret; + + *state = val.intval; return ret; } @@ -628,10 +631,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd, int ret; psy = tcd->devdata; - ret = psy->desc->get_property(psy, - POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val); - if (!ret) - *state = val.intval; + ret = power_supply_get_property(psy, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val); + if (ret) + return ret; + + *state = val.intval; return ret; } From fdecf36fcefa9f48c2f2de21c8176f1a8ce2c960 Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Sat, 11 Jun 2016 17:21:26 +0200 Subject: [PATCH 0546/1050] phy: marvell: fix LED configuration via marvell,reg-init Configuring the PHY LED registers for the Marvell 88E1510 and others is not possible, because regardless of the values in marvell,reg-init, it is later overridden in m88e1121_config_aneg with a non-standard default. This patch moves that default configuration to .config_init to allow setting the LED configuration through marvell,reg-init in the device tree, which should override said default if it exists. Signed-off-by: Clemens Gruber Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 280e8795b463..79ccc11facc3 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -407,15 +407,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev) if (err < 0) return err; - oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); - - phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE); - phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF); - phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); - - err = genphy_config_aneg(phydev); - - return err; + return genphy_config_aneg(phydev); } static int m88e1318_config_aneg(struct phy_device *phydev) @@ -636,6 +628,28 @@ static int m88e1111_config_init(struct phy_device *phydev) return phy_write(phydev, MII_BMCR, BMCR_RESET); } +static int m88e1121_config_init(struct phy_device *phydev) +{ + int err, oldpage; + + oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); + + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE); + if (err < 0) + return err; + + /* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */ + err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL, + MII_88E1121_PHY_LED_DEF); + if (err < 0) + return err; + + phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); + + /* Set marvell,reg-init configuration from device tree */ + return marvell_config_init(phydev); +} + static int m88e1510_config_init(struct phy_device *phydev) { int err; @@ -668,7 +682,7 @@ static int m88e1510_config_init(struct phy_device *phydev) return err; } - return marvell_config_init(phydev); + return m88e1121_config_init(phydev); } static int m88e1118_config_aneg(struct phy_device *phydev) @@ -1196,7 +1210,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_init = &marvell_config_init, + .config_init = &m88e1121_config_init, .config_aneg = &m88e1121_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, @@ -1215,7 +1229,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_init = &marvell_config_init, + .config_init = &m88e1121_config_init, .config_aneg = &m88e1318_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, From dcb94b88c09ce82a80e188d49bcffdc83ba215a6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 20:32:06 +0200 Subject: [PATCH 0547/1050] ipv6: fix endianness error in icmpv6_err IPv6 ping socket error handler doesn't correctly convert the new 32 bit mtu to host endianness before using. Cc: Lorenzo Colitti Fixes: 6d0bfe22611602f ("net: ipv6: Add IPv6 support to the ping socket.") Signed-off-by: Hannes Frederic Sowa Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4527285fcaa2..a4fa84076969 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) - ping_err(skb, offset, info); + ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); From 5119bd16815d3f0364390a1369392dcc036790e7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 20:41:38 +0200 Subject: [PATCH 0548/1050] ipv6: tcp: fix endianness annotation in tcp_v6_send_response Cc: Florent Fourcot Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f36c2d076fce..2255d2bf5f6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, u32 label) + u8 tclass, __be32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -911,7 +911,7 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - u32 label) + __be32 label) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); From c148d16369ff0095eca950d17968ba1d56a47b53 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 21:15:37 +0200 Subject: [PATCH 0549/1050] ipv6: fix checksum annotation in udp6_csum_init Cc: Tom Herbert Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_checksum.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index b2025bf3da4a..c0cbcb259f5a 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. */ - return skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); From b46d9f625b07f843c706c2c7d0210a90ccdf143b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 12 Jun 2016 12:02:46 +0200 Subject: [PATCH 0550/1050] ipv4: fix checksum annotation in udp4_csum_init Reported-by: Cong Wang Cc: Cong Wang Cc: Tom Herbert Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ff31d97d485..ba0d8b8b7690 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1755,8 +1755,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - return skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* From 881d0327db37ad917a367c77aff1afa1ee41e0a9 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Sun, 12 Jun 2016 17:36:37 +0800 Subject: [PATCH 0551/1050] net: alx: Work around the DMA RX overflow issue Commit 26c5f03 uses a new skb allocator to avoid the RFD overflow issue. But from debugging without datasheet, we found the error always happen when the DMA RX address is set to 0x....fc0, which is very likely to be a HW/silicon problem. So one idea is instead of adding a new allocator, why not just hitting the right target by avaiding the error-prone DMA address? This patch will actually * Remove the commit 26c5f03 * Apply rx skb with 64 bytes longer space, and if the allocated skb has a 0x...fc0 address, it will use skb_resever(skb, 64) to advance the address, so that the RX overflow can be avoided. In theory this method should also apply to atl1c driver, which I can't find anyone who can help to test on real devices. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70761 Signed-off-by: Feng Tang Suggested-by: Eric Dumazet Tested-by: Ole Lukoie Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 4 -- drivers/net/ethernet/atheros/alx/main.c | 61 ++++++------------------- 2 files changed, 14 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index d02c4240b7df..8fc93c5f6abc 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -96,10 +96,6 @@ struct alx_priv { unsigned int rx_ringsz; unsigned int rxbuf_size; - struct page *rx_page; - unsigned int rx_page_offset; - unsigned int rx_frag_size; - struct napi_struct napi; struct alx_tx_queue txq; struct alx_rx_queue rxq; diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c98acdc0d14f..e708e360a9e3 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry) } } -static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp) -{ - struct sk_buff *skb; - struct page *page; - - if (alx->rx_frag_size > PAGE_SIZE) - return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); - - page = alx->rx_page; - if (!page) { - alx->rx_page = page = alloc_page(gfp); - if (unlikely(!page)) - return NULL; - alx->rx_page_offset = 0; - } - - skb = build_skb(page_address(page) + alx->rx_page_offset, - alx->rx_frag_size); - if (likely(skb)) { - alx->rx_page_offset += alx->rx_frag_size; - if (alx->rx_page_offset >= PAGE_SIZE) - alx->rx_page = NULL; - else - get_page(page); - } - return skb; -} - - static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) { struct alx_rx_queue *rxq = &alx->rxq; @@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = &rxq->rfd[cur]; - skb = alx_alloc_skb(alx, gfp); + /* + * When DMA RX address is set to something like + * 0x....fc0, it will be very likely to cause DMA + * RFD overflow issue. + * + * To work around it, we apply rx skb with 64 bytes + * longer space, and offset the address whenever + * 0x....fc0 is detected. + */ + skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp); if (!skb) break; + + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + dma = dma_map_single(&alx->hw.pdev->dev, skb->data, alx->rxbuf_size, DMA_FROM_DEVICE); @@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur); } - return count; } @@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx) kfree(alx->txq.bufs); kfree(alx->rxq.bufs); - if (alx->rx_page) { - put_page(alx->rx_page); - alx->rx_page = NULL; - } - dma_free_coherent(&alx->hw.pdev->dev, alx->descmem.size, alx->descmem.virt, @@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx) alx->dev->name, alx); if (!err) goto out; - /* fall back to legacy interrupt */ pci_disable_msi(alx->hw.pdev); } @@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx) struct pci_dev *pdev = alx->hw.pdev; struct alx_hw *hw = &alx->hw; int err; - unsigned int head_size; err = alx_identify_hw(alx); if (err) { @@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx) hw->smb_timer = 400; hw->mtu = alx->dev->mtu; - alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu); - head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - alx->rx_frag_size = roundup_pow_of_two(head_size); - alx->tx_ringsz = 256; alx->rx_ringsz = 512; hw->imt = 200; @@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) { struct alx_priv *alx = netdev_priv(netdev); int max_frame = ALX_MAX_FRAME_LEN(mtu); - unsigned int head_size; if ((max_frame < ALX_MIN_FRAME_SIZE) || (max_frame > ALX_MAX_FRAME_SIZE)) @@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) netdev->mtu = mtu; alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); - head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - alx->rx_frag_size = roundup_pow_of_two(head_size); netdev_update_features(netdev); if (netif_running(netdev)) alx_reinit(alx); From 969a132723434f3723cc0606373785d19c1d2f05 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Jun 2016 15:13:05 -0500 Subject: [PATCH 0552/1050] usb: musb: sunxi: Fix NULL ptr deref when gadget is registered before musb Stop using the return value of platform_device_register_full() to get to the struct musb in sunxi_musb_work(). If a gadget has been registered (insmod-ed) before the musb driver, then musb_start will get called from the musb_core probe function and sunxi_musb_work() may run before platform_device_register_full() has returned. Instead store a pointer to struct musb in struct sunxi_glue when sunxi_musb_enable gets called. Note that sunxi_musb_enable always gets called before sunxi_musb_work() can run. Signed-off-by: Hans de Goede [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index e7d46179b485..dc49041498a3 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -80,7 +80,8 @@ static struct musb *sunxi_musb; struct sunxi_glue { struct device *dev; - struct platform_device *musb; + struct musb *musb; + struct platform_device *musb_pdev; struct clk *clk; struct reset_control *rst; struct phy *phy; @@ -102,7 +103,7 @@ static void sunxi_musb_work(struct work_struct *work) return; if (test_and_clear_bit(SUNXI_MUSB_FL_HOSTMODE_PEND, &glue->flags)) { - struct musb *musb = platform_get_drvdata(glue->musb); + struct musb *musb = glue->musb; unsigned long flags; u8 devctl; @@ -337,6 +338,8 @@ static void sunxi_musb_enable(struct musb *musb) { struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent); + glue->musb = musb; + /* musb_core does not call us in a balanced manner */ if (test_and_set_bit(SUNXI_MUSB_FL_ENABLED, &glue->flags)) return; @@ -732,9 +735,9 @@ static int sunxi_musb_probe(struct platform_device *pdev) pinfo.data = &pdata; pinfo.size_data = sizeof(pdata); - glue->musb = platform_device_register_full(&pinfo); - if (IS_ERR(glue->musb)) { - ret = PTR_ERR(glue->musb); + glue->musb_pdev = platform_device_register_full(&pinfo); + if (IS_ERR(glue->musb_pdev)) { + ret = PTR_ERR(glue->musb_pdev); dev_err(&pdev->dev, "Error registering musb dev: %d\n", ret); goto err_unregister_usb_phy; } @@ -751,7 +754,7 @@ static int sunxi_musb_remove(struct platform_device *pdev) struct sunxi_glue *glue = platform_get_drvdata(pdev); struct platform_device *usb_phy = glue->usb_phy; - platform_device_unregister(glue->musb); /* Frees glue ! */ + platform_device_unregister(glue->musb_pdev); /* Frees glue ! */ usb_phy_generic_unregister(usb_phy); return 0; From 1c4bf5ac6a16d9321b51e91acef481b090e5486b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Jun 2016 15:13:06 -0500 Subject: [PATCH 0553/1050] usb: musb: sunxi: Remove bogus "Frees glue" comment The comment is wrong, glue is devm_kzalloc-ed mem attached to the "allwinner,sun4i-a10-musb" compatible platform-dev. Where as glue->musb_pdev is a newly created "musb-hdrc" platform-dev. Signed-off-by: Hans de Goede [b-liu@ti.com: revise subject prefix] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index dc49041498a3..76500515dd8b 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -754,7 +754,7 @@ static int sunxi_musb_remove(struct platform_device *pdev) struct sunxi_glue *glue = platform_get_drvdata(pdev); struct platform_device *usb_phy = glue->usb_phy; - platform_device_unregister(glue->musb_pdev); /* Frees glue ! */ + platform_device_unregister(glue->musb_pdev); usb_phy_generic_unregister(usb_phy); return 0; From 6c0d54f1897d229748d4f41ef919078db6db2123 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Jun 2016 20:01:25 -0700 Subject: [PATCH 0554/1050] net_sched: fix pfifo_head_drop behavior vs backlog When the qdisc is full, we drop a packet at the head of the queue, queue the current skb and return NET_XMIT_CN Now we track backlog on upper qdiscs, we need to call qdisc_tree_reduce_backlog(), even if the qlen did not change. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac0a61e..2e4bd2c0a50c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } From 476490a945e1f0f6bd58e303058d2d8ca93a974c Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 14 Jun 2016 11:04:09 -0400 Subject: [PATCH 0555/1050] drm/i915/ilk: Don't disable SSC source if it's in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Ville Syrjälä for pointing me towards the cause of this issue. Unfortunately one of the sideaffects of having the refclk for a DPLL set to SSC is that as long as it's set to SSC, the GPU will prevent us from powering down any of the pipes or transcoders using it. A couple of BIOSes enable SSC in both PCH_DREF_CONTROL and in the DPLL configurations. This causes issues on the first modeset, since we don't expect SSC to be left on and as a result, can't successfully power down the pipes or the transcoders using it. Here's an example from this Dell OptiPlex 990: [drm:intel_modeset_init] SSC enabled by BIOS, overriding VBT which says disabled [drm:intel_modeset_init] 2 display pipes available. [drm:intel_update_cdclk] Current CD clock rate: 400000 kHz [drm:intel_update_max_cdclk] Max CD clock rate: 400000 kHz [drm:intel_update_max_cdclk] Max dotclock rate: 360000 kHz vgaarb: device changed decodes: PCI:0000:00:02.0,olddecodes=io+mem,decodes=io+mem:owns=io+mem [drm:intel_crt_reset] crt adpa set to 0xf40000 [drm:intel_dp_init_connector] Adding DP connector on port C [drm:intel_dp_aux_init] registering DPDDC-C bus for card0-DP-1 [drm:ironlake_init_pch_refclk] has_panel 0 has_lvds 0 has_ck505 0 [drm:ironlake_init_pch_refclk] Disabling SSC entirely … later we try committing the first modeset … [drm:intel_dump_pipe_config] [CRTC:26][modeset] config ffff88041b02e800 for pipe A [drm:intel_dump_pipe_config] cpu_transcoder: A … [drm:intel_dump_pipe_config] dpll_hw_state: dpll: 0xc4016001, dpll_md: 0x0, fp0: 0x20e08, fp1: 0x30d07 [drm:intel_dump_pipe_config] planes on this crtc [drm:intel_dump_pipe_config] STANDARD PLANE:23 plane: 0.0 idx: 0 enabled [drm:intel_dump_pipe_config] FB:42, fb = 800x600 format = 0x34325258 [drm:intel_dump_pipe_config] scaler:0 src (0, 0) 800x600 dst (0, 0) 800x600 [drm:intel_dump_pipe_config] CURSOR PLANE:25 plane: 0.1 idx: 1 disabled, scaler_id = 0 [drm:intel_dump_pipe_config] STANDARD PLANE:27 plane: 0.1 idx: 2 disabled, scaler_id = 0 [drm:intel_get_shared_dpll] CRTC:26 allocated PCH DPLL A [drm:intel_get_shared_dpll] using PCH DPLL A for pipe A [drm:ilk_audio_codec_disable] Disable audio codec on port C, pipe A [drm:intel_disable_pipe] disabling pipe A ------------[ cut here ]------------ WARNING: CPU: 1 PID: 130 at drivers/gpu/drm/i915/intel_display.c:1146 intel_disable_pipe+0x297/0x2d0 [i915] pipe_off wait timed out … ---[ end trace 94fc8aa03ae139e8 ]--- [drm:intel_dp_link_down] [drm:ironlake_crtc_disable [i915]] *ERROR* failed to disable transcoder A Later modesets succeed since they reset the DPLL's configuration anyway, but this is enough to get stuck with a big fat warning in dmesg. A better solution would be to add refcounts for the SSC source, but for now leaving the source clock on should suffice. Changes since v4: - Fix calculation of final for systems with LVDS panels (fixes BUG() on CI test suite) Changes since v3: - Move temp variable into loop - Move checks for using_ssc_source to after we've figured out has_ck505 - Add using_ssc_source to debug output Changes since v2: - Fix debug output for when we disable the CPU source Changes since v1: - Leave the SSC source clock on instead of just shutting it off on all of the DPLL configurations. Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Signed-off-by: Lyude Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1465916649-10228-1-git-send-email-cpaul@redhat.com Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 48 ++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 718f56525b96..56a1637c864f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8275,12 +8275,14 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *encoder; + int i; u32 val, final; bool has_lvds = false; bool has_cpu_edp = false; bool has_panel = false; bool has_ck505 = false; bool can_ssc = false; + bool using_ssc_source = false; /* We need to take the global config into account */ for_each_intel_encoder(dev, encoder) { @@ -8307,8 +8309,22 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) can_ssc = true; } - DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d\n", - has_panel, has_lvds, has_ck505); + /* Check if any DPLLs are using the SSC source */ + for (i = 0; i < dev_priv->num_shared_dpll; i++) { + u32 temp = I915_READ(PCH_DPLL(i)); + + if (!(temp & DPLL_VCO_ENABLE)) + continue; + + if ((temp & PLL_REF_INPUT_MASK) == + PLLB_REF_INPUT_SPREADSPECTRUMIN) { + using_ssc_source = true; + break; + } + } + + DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d using_ssc_source %d\n", + has_panel, has_lvds, has_ck505, using_ssc_source); /* Ironlake: try to setup display ref clock before DPLL * enabling. This is only under driver's control after @@ -8345,9 +8361,9 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) final |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; } else final |= DREF_CPU_SOURCE_OUTPUT_DISABLE; - } else { - final |= DREF_SSC_SOURCE_DISABLE; - final |= DREF_CPU_SOURCE_OUTPUT_DISABLE; + } else if (using_ssc_source) { + final |= DREF_SSC_SOURCE_ENABLE; + final |= DREF_SSC1_ENABLE; } if (final == val) @@ -8393,7 +8409,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) POSTING_READ(PCH_DREF_CONTROL); udelay(200); } else { - DRM_DEBUG_KMS("Disabling SSC entirely\n"); + DRM_DEBUG_KMS("Disabling CPU source output\n"); val &= ~DREF_CPU_SOURCE_OUTPUT_MASK; @@ -8404,16 +8420,20 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) POSTING_READ(PCH_DREF_CONTROL); udelay(200); - /* Turn off the SSC source */ - val &= ~DREF_SSC_SOURCE_MASK; - val |= DREF_SSC_SOURCE_DISABLE; + if (!using_ssc_source) { + DRM_DEBUG_KMS("Disabling SSC source\n"); - /* Turn off SSC1 */ - val &= ~DREF_SSC1_ENABLE; + /* Turn off the SSC source */ + val &= ~DREF_SSC_SOURCE_MASK; + val |= DREF_SSC_SOURCE_DISABLE; - I915_WRITE(PCH_DREF_CONTROL, val); - POSTING_READ(PCH_DREF_CONTROL); - udelay(200); + /* Turn off SSC1 */ + val &= ~DREF_SSC1_ENABLE; + + I915_WRITE(PCH_DREF_CONTROL, val); + POSTING_READ(PCH_DREF_CONTROL); + udelay(200); + } } BUG_ON(val != final); From d1e37288c9146dccff830e3253e403af8705b51f Mon Sep 17 00:00:00 2001 From: "Su, Xuemin" Date: Mon, 13 Jun 2016 11:02:50 +0800 Subject: [PATCH 0556/1050] udp reuseport: fix packet of same flow hashed to different socket There is a corner case in which udp packets belonging to a same flow are hashed to different socket when hslot->count changes from 10 to 11: 1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash, and always passes 'daddr' to udp_ehashfn(). 2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2, but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to INADDR_ANY instead of some specific addr. That means when hslot->count changes from 10 to 11, the hash calculated by udp_ehashfn() is also changed, and the udp packets belonging to a same flow will be hashed to different socket. This is easily reproduced: 1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000. 2) From the same host send udp packets to 127.0.0.1:40000, record the socket index which receives the packets. 3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096 is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the same hslot as the aformentioned 10 sockets, and makes the hslot->count change from 10 to 11. 4) From the same host send udp packets to 127.0.0.1:40000, and the socket index which receives the packets will be different from the one received in step 2. This should not happen as the socket bound to 0.0.0.0:44096 should not change the behavior of the sockets bound to 0.0.0.0:40000. It's the same case for IPv6, and this patch also fixes that. Signed-off-by: Su, Xuemin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 73 +++++++++++--------------------------------------- net/ipv6/udp.c | 71 +++++++++++------------------------------------- 2 files changed, 32 insertions(+), 112 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ba0d8b8b7690..ca5e8ea29538 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, - __be32 saddr, unsigned short hnum, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static int compute_score(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, int dif) { int score; struct inet_sock *inet; @@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -/* - * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) - */ -static inline int compute_score2(struct sock *sk, struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - ipv6_only_sock(sk)) - return -1; - - inet = inet_sk(sk); - - if (inet->inet_rcv_saddr != daddr || - inet->inet_num != hnum) - return -1; - - score = (sk->sk_family == PF_INET) ? 2 : 1; - - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - static u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) @@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -572,8 +531,8 @@ begin: result = NULL; badness = 0; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, hnum, sport, - daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f421c9f23c5b..005dc82c2138 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { From 7c64cc0531fa0d9720f9e15a0a0d97bcad1d1cd1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: [PATCH 0557/1050] arm: Use _rcuidle for smp_cross_call() tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified another unprotected use of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/ipi.h:35 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP4 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (smp_cross_call+0xbc/0x188) [] (smp_cross_call) from [] (generic_exec_single+0x9c/0x15c) [] (generic_exec_single) from [] (smp_call_function_single_async+0 x38/0x9c) [] (smp_call_function_single_async) from [] (cpuidle_coupled_poke_others+0x8c/0xa8) [] (cpuidle_coupled_poke_others) from [] (cpuidle_enter_state_coupled+0x26c/0x390) [] (cpuidle_enter_state_coupled) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck Cc: Russell King Cc: Steven Rostedt Cc: Cc: --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index baee70267f29..7afe48ae5d76 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise(target, ipi_types[ipinr]); + trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); __smp_cross_call(target, ipinr); } From b00345d1994d588fa2687e1238fcd542f0320cba Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 14 Jun 2016 23:12:59 -0700 Subject: [PATCH 0558/1050] cpufreq: intel_pstate: Adjust _PSS[0] freqeuency if needed The maximum turbo P-State used by the intel_pstate driver may be limited by ACPI _PSS table entry 0. After commit 9522a2ff9cde (cpufreq: intel_pstate: Enforce _PPC limits), the maximum performance on servers will be capped by the _PSS table entry 0 by default. Even though that is formally correct, it may lead to preformance regressions in some cases. Namely, if the _PSS table entry 0 is not the maximum turbo P-State, performance measured after commit 9522a2ff9cde will not match the performance measured before that commit on the same system. For this reason, modify the code to always use the maximum turbo frequency as the one that corresponds to _PSS table entry 0 if turbo is enabled in the BIOS. This way, the performance levels from before commit 9522a2ff9cde will be restored on the affected systems. Fixes: 9522a2ff9cde (cpufreq: intel_pstate: Enforce _PPC limits) Suggested-by: Rafael J. Wysocki Signed-off-by: Srinivas Pandruvada [ rjw : Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ee367e9b7d2e..fe9dc17ea873 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -372,26 +372,9 @@ static bool intel_pstate_get_ppc_enable_status(void) return acpi_ppc; } -/* - * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and - * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and - * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state - * ratio, out of it only high 8 bits are used. For example 0x1700 is setting - * target ratio 0x17. The _PSS control value stores in a format which can be - * directly written to PERF_CTL MSR. But in intel_pstate driver this shift - * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). - * This function converts the _PSS control value to intel pstate driver format - * for comparison and assignment. - */ -static int convert_to_native_pstate_format(struct cpudata *cpu, int index) -{ - return cpu->acpi_perf_data.states[index].control >> 8; -} - static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { struct cpudata *cpu; - int turbo_pss_ctl; int ret; int i; @@ -441,11 +424,10 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) * max frequency, which will cause a reduced performance as * this driver uses real max turbo frequency as the max * frequency. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo ratio. + * correct max turbo frequency based on the turbo state. * Also need to convert to MHz as _PSS freq is in MHz. */ - turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); - if (turbo_pss_ctl > cpu->pstate.max_pstate) + if (!limits->turbo_disabled) cpu->acpi_perf_data.states[0].core_frequency = policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; From da4e792550a856e2f66aa8183d408553f7513a86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Jun 2016 02:16:13 +0200 Subject: [PATCH 0559/1050] Revert "ACPICA: ACPI 2.0, Hardware: Add access_width/bit_offset support for acpi_hw_write()" Revert commit 66b1ed5aa8dd "ACPICA: ACPI 2.0, Hardware: Add access_width/bit_offset support for acpi_hw_write()" that is reported to break suspend-to-RAM (ACPI S3) on one system. The root cause of the failure is a wrong access width value for one of the involved registers provided by the ACPI tables, but before commit 66b1ed5aa8dd that value was not taken into account at all and things worked. Fixes: 66b1ed5aa8dd "ACPICA: ACPI 2.0, Hardware: Add access_width/bit_offset support for acpi_hw_write()" Reported-by: Andrey Skvortsov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/hwregs.c | 144 ++--------------------------------- 1 file changed, 8 insertions(+), 136 deletions(-) diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index daceb80022b0..3b7fb99362b6 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -306,12 +306,6 @@ acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg) acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg) { u64 address; - u8 access_width; - u32 bit_width; - u8 bit_offset; - u64 value64; - u32 new_value32, old_value32; - u8 index; acpi_status status; ACPI_FUNCTION_NAME(hw_write); @@ -323,145 +317,23 @@ acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg) return (status); } - /* Convert access_width into number of bits based */ - - access_width = acpi_hw_get_access_bit_width(reg, 32); - bit_width = reg->bit_offset + reg->bit_width; - bit_offset = reg->bit_offset; - /* * Two address spaces supported: Memory or IO. PCI_Config is * not supported here because the GAS structure is insufficient */ - index = 0; - while (bit_width) { - /* - * Use offset style bit reads because "Index * AccessWidth" is - * ensured to be less than 32-bits by acpi_hw_validate_register(). - */ - new_value32 = ACPI_GET_BITS(&value, index * access_width, - ACPI_MASK_BITS_ABOVE_32 - (access_width)); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + status = acpi_os_write_memory((acpi_physical_address) + address, (u64)value, + reg->bit_width); + } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ - if (bit_offset >= access_width) { - bit_offset -= access_width; - } else { - /* - * Use offset style bit masks because access_width is ensured - * to be less than 32-bits by acpi_hw_validate_register() and - * bit_offset/bit_width is less than access_width here. - */ - if (bit_offset) { - new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset); - } - if (bit_width < access_width) { - new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width); - } - - if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { - if (bit_offset || bit_width < access_width) { - /* - * Read old values in order not to modify the bits that - * are beyond the register bit_width/bit_offset setting. - */ - status = - acpi_os_read_memory((acpi_physical_address) - address + - index * - ACPI_DIV_8 - (access_width), - &value64, - access_width); - old_value32 = (u32)value64; - - /* - * Use offset style bit masks because access_width is - * ensured to be less than 32-bits by - * acpi_hw_validate_register() and bit_offset/bit_width is - * less than access_width here. - */ - if (bit_offset) { - old_value32 &= - ACPI_MASK_BITS_ABOVE - (bit_offset); - bit_offset = 0; - } - if (bit_width < access_width) { - old_value32 &= - ACPI_MASK_BITS_BELOW - (bit_width); - } - - new_value32 |= old_value32; - } - - value64 = (u64)new_value32; - status = - acpi_os_write_memory((acpi_physical_address) - address + - index * - ACPI_DIV_8 - (access_width), - value64, access_width); - } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */ - - if (bit_offset || bit_width < access_width) { - /* - * Read old values in order not to modify the bits that - * are beyond the register bit_width/bit_offset setting. - */ - status = - acpi_hw_read_port((acpi_io_address) - address + - index * - ACPI_DIV_8 - (access_width), - &old_value32, - access_width); - - /* - * Use offset style bit masks because access_width is - * ensured to be less than 32-bits by - * acpi_hw_validate_register() and bit_offset/bit_width is - * less than access_width here. - */ - if (bit_offset) { - old_value32 &= - ACPI_MASK_BITS_ABOVE - (bit_offset); - bit_offset = 0; - } - if (bit_width < access_width) { - old_value32 &= - ACPI_MASK_BITS_BELOW - (bit_width); - } - - new_value32 |= old_value32; - } - - status = acpi_hw_write_port((acpi_io_address) - address + - index * - ACPI_DIV_8 - (access_width), - new_value32, - access_width); - } - } - - /* - * Index * access_width is ensured to be less than 32-bits by - * acpi_hw_validate_register(). - */ - bit_width -= - bit_width > access_width ? access_width : bit_width; - index++; + status = acpi_hw_write_port((acpi_io_address) + address, value, reg->bit_width); } ACPI_DEBUG_PRINT((ACPI_DB_IO, "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n", - value, access_width, ACPI_FORMAT_UINT64(address), + value, reg->bit_width, ACPI_FORMAT_UINT64(address), acpi_ut_get_region_name(reg->space_id))); return (status); From fffc5f59f2c68ca859c3f92b224393ed3adbe1ca Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 2 Jun 2016 19:27:51 +0200 Subject: [PATCH 0560/1050] drm/crtc: fix connector reference counting mismatch in drm_crtc_helper_set_config Since commit 0955c1250e96 ("drm/crtc: take references to connectors used in a modeset. (v2)"), the reference counts of all connectors in the drm_mode_set given to drm_crtc_helper_set_config are incremented, and then the reference counts of all connectors are decremented on success, but in a temporary copy of the connector structure. This leads to the following error after the first modeset on imx-drm: Unable to handle kernel NULL pointer dereference at virtual address 00000004 pgd = ad8c4000 [00000004] *pgd=3d9c5831, *pte=00000000, *ppte=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP ARM Modules linked in: CPU: 1 PID: 190 Comm: kmsfb-manage Not tainted 4.7.0-rc1+ #657 Hardware name: Freescale i.MX6 Quad/DualLit: [<80506098>] lr : [<80252e94>] psr: 200c0013 sp : adca7ca8 ip : adca7b90 fp : adca7cd4 r10: 00000000 r9 : 00000100 r8 : 00000200 r7 : af3c9800 r6 : aded7848 r5 : aded7800 r4 : 00000000 r3 : af3ca058 r2 : 00000200 r1 : af3ca058 r0 : 00000000 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 3d8c404a DAC: 00000051 Process kmsfb-manage (pid: 190, stack limit = 0xadca6210) Stack: (0xadca7ca8 to 0xadca8000) 7ca0: 805190e0 aded7800 aded7820 80501a88 8155a290 af3c9c6c 7cc0: adca7ddc 0000000f adca7cec adca7cd8 80519104 80506044 805190e0 aded7800 7ce0: adca7d04 adca7cf0 80501ac0 805190ec aded7820 aded7814 adca7d24 adca7d08 7d00: 804fdb80 80501a94 aded7800 af3ca010 aded7afc af3c9c60 adca7d94 adca7d28 7d20: 804e3518 804fdb20 00000000 af3c9b1c adca7d50 81506f44 00000000 8093c500 7d40: af3c9c6c ae4f2ca8 ae4f2c18 00000000 00000000 ae637f00 00000000 aded7800 7d60: 00000001 af3c9800 af23c300 ae77fcc0 ae4f2c18 00000001 af3c9800 8155a290 7d80: af1af700 adca6000 adca7db4 adca7d98 804fea6c 804e2de4 adca7e50 adb3d940 7da0: 00000001 af3c9800 adca7e24 adca7db8 8050440c 804fea0c ae77fcc0 00000003 7dc0: adca7e24 adb3d940 af1af700 ae77fcc0 ae77fccc ae4f2c18 8083d44c ae77fcc0 7de0: ae4002 80d03040 adca7e64 adca7e40 adca7e50 80503f08 7e40: 7ebd5630 adca7e50 00000068 c06864a2 7ebd5be8 00000000 00000001 00000018 7e60: 00000026 00000000 00000000 00000000 00000001 000115bc 05010500 05a0059f 7e80: 03200000 03360321 00000337 0000003c 00000000 00000040 30383231 30303878 7ea0: 00000000 00000000 00000000 00000000 00000000 00000000 80173058 80172e30 7ec0: 80d77d32 00004000 adf7d900 00000003 00000000 7ebd5630 af342bb0 adfe3b80 7ee0: 80272f50 00000003 adca6000 00000000 adca7f7c adca7f00 802725ec 804f52cc 7f00: 802809cc 80178450 00000000 00000000 80280880 80145904 adb3d8c0 adf7d990 7f20: ffffffff 00000003 00004000 01614c10 c06864a2 00000003 adca6000 00000000 7f40: adca7f6c adca7f50 80280b04 8028088c 000115bc adfe3b81 7ebd5630 adfe3b80 7f60: c06864a2 00000003 adca6000 00000000 adca7fa4 adca7f80 80272f50 80272548 7f80: 000115bc 00017050 00000001 01614c10 00000036 801089e4 00000000 adca7fa8 7fa0: 80108840 80272f18 00017050 00000001 00000003 c06864a2 7ebd5630 000115bc 7fc0: 00017050 00000001 01614c10 00000036 00000003 00000000 00000026 00000018 7fe0: 00016f38 7ebd562c 0000b5e9 76ef31e6 400c0030 00000003 ff5f37db bfe7dd4d Backtrace: [<80506038>] (drm_connector_cleanup) from [<80519104>] (dw_hdmi_connector_destroy+0x24/0x28) r10:0000000f r9:adca7ddc r8:af3c9c6c r7:8155a290 r6:80501a88 r5:aded7820 r4:aded7800 r3:805190e0 [<805190e0>] (dw_hdmi_connector_destroy) from [<80501ac0>] (drm_connector_free+0x38/0x3c) r4:aded7800 nreference) from [<804e3518>] (drm_crtc_helper_set_config+0x740/0xbf4) r6:af3c9c60 r5:aded7afc r4:af3ca010 r3:aded7800 [<804e2dd8>] (drm_crtc_helper_set_config) from [<804fea6c>] (drm_mode_set_config_internal+0x6c/0xf4) r10:adca6000 r9:af1af700 r8:8155a290 r7:af3c9800 r6:00000001 r5:ae4f2c18 r4:ae77fcc0 [<804fea00>] (drm_mode_set_config_internal) from [<8050440c>] (drm_mode_setcrtc+0x504/0x57c) r7:af3c9800 r6:00000001 r5:adb3d940 r4:adca7e50 [<80503f08>] (drm_mode_setcrtc) from [<804f5404>] (drm_ioctl+0x144/0x4dc) r10:ada2e000 r9:000000a2 r8:af3c9800 r7:8155a290 r6:809320b4 r5:00000051 r4:adca7e50 [<804f52c0>] (drm_ioctl) from [<802725ec>] (do_vfs_ioctl+0xb0/0x9d0) r10:00000000 r9:adca6000 r8:00000003 r7:80272f50 r6:adfe3b80 r5:af342bb0 r4:7ebd5630 [<8027253c>] (do_vfs_ioctl) from [<80272f50>] (SyS_ioctl+0x44/0x6c) r10:00000000 r9:adca6000 r8:00000003 r7:c06864a2 r6:adfe3b80 r5:7ebd5630 r4:adfe3b81 [<80272f0c>] (SyS_ioctl) from [<80108840>] (ret_fast_syscall+0x0/0x1c) r8:801089e4 r7:00000036 r6:01614c10 r5:00000001 r4:00017050 r3:000115bc Code: 0a00000c e5932004 e1a01003 e1a0a004 (e5842004) ---[ end trace 9a7257572ccacb16 ]--- Only the reference count of connectors that weren't previously bound to an encoder should be incremented after a call to drm_crtc_helper_set_config. And only the reference count of connectors that were previously bound to an encoder and are unbound afterwards should ever be decremented. The reference counts of the temporary copies in the save_connectors should not be touched at all. This patch fixes the above error by only incrementing the reference count of those connectors in the set that are initially not bound to any encoder, and also by restoring the reference count of only those connectors in the set in the failure case. "Note that this can only be hit when fbdev emulation is disabled, since then the refcount drops from 1 to 0 and we call the connector destroy functions on the backup copy, which eventually results in tears. With fbdev emulation the refcount only goes down from 2 to 1 ever. And since we unconditionally increment the refcount on the real object, the refcount of that will slowly increase. The backup connector's refcount doesn't matter, since we kfree() that either way in the end of drm_crtc_helper_set_config()." Fixes: 0955c1250e96 ("drm/crtc: take references to connectors used in a modeset. (v2)") Signed-off-by: Philipp Zabel Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index a6e42433ef0e..1c4d6746f90c 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -631,8 +631,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) mode_changed = true; } - /* take a reference on all connectors in set */ + /* take a reference on all unbound connectors in set, reuse the + * already taken reference for bound connectors + */ for (ro = 0; ro < set->num_connectors; ro++) { + if (set->connectors[ro]->encoder) + continue; drm_connector_reference(set->connectors[ro]); } @@ -754,12 +758,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } } - /* after fail drop reference on all connectors in save set */ - count = 0; - drm_for_each_connector(connector, dev) { - drm_connector_unreference(&save_connectors[count++]); - } - kfree(save_connectors); kfree(save_encoders); return 0; @@ -776,8 +774,12 @@ fail: *connector = save_connectors[count++]; } - /* after fail drop reference on all connectors in set */ + /* after fail drop reference on all unbound connectors in set, let + * bound connectors keep their reference + */ for (ro = 0; ro < set->num_connectors; ro++) { + if (set->connectors[ro]->encoder) + continue; drm_connector_unreference(set->connectors[ro]); } From 93f55972bc0fef0e394bbed3d46dc06b9afd3d17 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 2 Jun 2016 19:27:52 +0200 Subject: [PATCH 0561/1050] drm/crtc: only store the necessary data for set_config rollback drm_crtc_helper_set_config only potentially touches connector->encoder and encoder->crtc, so we only have to store those for all connectors and encoders, respectively. Suggested-by: Daniel Vetter Signed-off-by: Philipp Zabel Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 1c4d6746f90c..26feb2f8453f 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -528,11 +528,11 @@ drm_crtc_helper_disable(struct drm_crtc *crtc) int drm_crtc_helper_set_config(struct drm_mode_set *set) { struct drm_device *dev; - struct drm_crtc *new_crtc; - struct drm_encoder *save_encoders, *new_encoder, *encoder; + struct drm_crtc **save_encoder_crtcs, *new_crtc; + struct drm_encoder **save_connector_encoders, *new_encoder, *encoder; bool mode_changed = false; /* if true do a full mode set */ bool fb_changed = false; /* if true and !mode_changed just do a flip */ - struct drm_connector *save_connectors, *connector; + struct drm_connector *connector; int count = 0, ro, fail = 0; const struct drm_crtc_helper_funcs *crtc_funcs; struct drm_mode_set save_set; @@ -574,15 +574,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) * Allocate space for the backup of all (non-pointer) encoder and * connector data. */ - save_encoders = kzalloc(dev->mode_config.num_encoder * - sizeof(struct drm_encoder), GFP_KERNEL); - if (!save_encoders) + save_encoder_crtcs = kzalloc(dev->mode_config.num_encoder * + sizeof(struct drm_crtc *), GFP_KERNEL); + if (!save_encoder_crtcs) return -ENOMEM; - save_connectors = kzalloc(dev->mode_config.num_connector * - sizeof(struct drm_connector), GFP_KERNEL); - if (!save_connectors) { - kfree(save_encoders); + save_connector_encoders = kzalloc(dev->mode_config.num_connector * + sizeof(struct drm_encoder *), GFP_KERNEL); + if (!save_connector_encoders) { + kfree(save_encoder_crtcs); return -ENOMEM; } @@ -593,12 +593,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) */ count = 0; drm_for_each_encoder(encoder, dev) { - save_encoders[count++] = *encoder; + save_encoder_crtcs[count++] = encoder->crtc; } count = 0; drm_for_each_connector(connector, dev) { - save_connectors[count++] = *connector; + save_connector_encoders[count++] = connector->encoder; } save_set.crtc = set->crtc; @@ -758,20 +758,20 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } } - kfree(save_connectors); - kfree(save_encoders); + kfree(save_connector_encoders); + kfree(save_encoder_crtcs); return 0; fail: /* Restore all previous data. */ count = 0; drm_for_each_encoder(encoder, dev) { - *encoder = save_encoders[count++]; + encoder->crtc = save_encoder_crtcs[count++]; } count = 0; drm_for_each_connector(connector, dev) { - *connector = save_connectors[count++]; + connector->encoder = save_connector_encoders[count++]; } /* after fail drop reference on all unbound connectors in set, let @@ -789,8 +789,8 @@ fail: save_set.y, save_set.fb)) DRM_ERROR("failed to restore config after modeset failure\n"); - kfree(save_connectors); - kfree(save_encoders); + kfree(save_connector_encoders); + kfree(save_encoder_crtcs); return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_config); From fd2d2bac6e79b0be91ab86a6075a0c46ffda658a Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 25 May 2016 16:45:43 -0400 Subject: [PATCH 0562/1050] drm/dp/mst: Always clear proposed vcpi table for port. Not clearing mst manager's proposed vcpis table for destroyed connectors when the manager is stopped leaves it pointing to unrefernced memory, this causes pagefault when the manager is restarted when plugging back a branch. Fixes: 91a25e463130 ("drm/dp/mst: deallocate payload on port destruction") Signed-off-by: Andrey Grodzovsky Reviewed-by: Lyude Cc: stable@vger.kernel.org Cc: Mykola Lysenko Cc: Alex Deucher --- drivers/gpu/drm/drm_dp_mst_topology.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index a13edf5de2d6..6537908050d7 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2927,11 +2927,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) drm_dp_port_teardown_pdt(port, port->pdt); if (!port->input && port->vcpi.vcpi > 0) { - if (mgr->mst_state) { - drm_dp_mst_reset_vcpi_slots(mgr, port); - drm_dp_update_payload_part1(mgr); - drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); - } + drm_dp_mst_reset_vcpi_slots(mgr, port); + drm_dp_update_payload_part1(mgr); + drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); } kref_put(&port->kref, drm_dp_free_mst_port); From 8beb330044d0d1878c7b92290e91c0b889e92633 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 13 Jun 2016 22:00:07 -0700 Subject: [PATCH 0563/1050] 53c700: fix BUG on untagged commands The untagged command case in the 53c700 driver has been broken since host wide tags were enabled because the replaced scsi_find_tag() function had a special case for the tag value SCSI_NO_TAG to retrieve sdev->current_cmnd. The replacement function scsi_host_find_tag() has no such special case and returns NULL causing untagged commands to trigger a BUG() in the driver. Inspection shows that the 53c700 is the only driver using this SCSI_NO_TAG case, so a local fix in the driver suffices to fix this problem globally. Fixes: 64d513ac31b - "scsi: use host wide tags by default" Cc: stable@vger.kernel.org # 4.4+ Reported-by: Helge Deller Tested-by: Helge Deller Signed-off-by: James Bottomley Reviewed-by: Johannes Thumshirn Reviewed-by: Ewan D. Milne Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/53c700.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index d4c285688ce9..3ddc85e6efd6 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, } else { struct scsi_cmnd *SCp; - SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG); + SCp = SDp->current_cmnd; if(unlikely(SCp == NULL)) { sdev_printk(KERN_ERR, SDp, "no saved request for untagged cmd\n"); @@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *) slot->tag, slot); } else { slot->tag = SCSI_NO_TAG; - /* must populate current_cmnd for scsi_host_find_tag to work */ + /* save current command for reselection */ SCp->device->current_cmnd = SCp; } /* sanity check: some of the commands generated by the mid-layer From 106da663ff495e0aea3ac15b8317aa410754fcac Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:04 +0200 Subject: [PATCH 0564/1050] ovs/gre,geneve: fix error path when creating an iface After ipgre_newlink()/geneve_configure() call, the netdev is registered. Fixes: 7e059158d57b ("vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices") CC: David Wragg Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 +++++++--- net/ipv4/ip_gre.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cadefe4fdaa2..086c2dae4c3d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1508,6 +1508,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); int err; memset(tb, 0, sizeof(tb)); @@ -1519,8 +1520,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, 0, htons(dst_port), true, GENEVE_F_UDP_ZERO_CSUM6_RX); - if (err) - goto err; + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1532,7 +1535,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, return dev; err: - free_netdev(dev); + geneve_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4d2025f7ec57..08deba679c8c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1121,6 +1121,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); struct ip_tunnel *t; int err; @@ -1136,8 +1137,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL); - if (err < 0) - goto out; + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1148,7 +1151,8 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, return dev; out: - free_netdev(dev); + ip_tunnel_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); From cf5da330bbdd0c06b05c525a3d1d58ccd82c87a6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:05 +0200 Subject: [PATCH 0565/1050] ovs/vxlan: fix rtnl notifications on iface deletion The function vxlan_dev_create() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Note that the function vxlan_dev_create() is moved after the rtnl stuff so that vxlan_dellink() can be called in this function. Fixes: dcc38c033b32 ("openvswitch: Re-add CONFIG_OPENVSWITCH_VXLAN") CC: Thomas Graf CC: Pravin B Shelar Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 58 ++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f999db2f97b4..b3b9db68f758 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2952,30 +2952,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, return 0; } -struct net_device *vxlan_dev_create(struct net *net, const char *name, - u8 name_assign_type, struct vxlan_config *conf) -{ - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev; - int err; - - memset(&tb, 0, sizeof(tb)); - - dev = rtnl_create_link(net, name, name_assign_type, - &vxlan_link_ops, tb); - if (IS_ERR(dev)) - return dev; - - err = vxlan_dev_configure(net, dev, conf); - if (err < 0) { - free_netdev(dev); - return ERR_PTR(err); - } - - return dev; -} -EXPORT_SYMBOL_GPL(vxlan_dev_create); - static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -3268,6 +3244,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = { .get_link_net = vxlan_get_link_net, }; +struct net_device *vxlan_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct vxlan_config *conf) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, name, name_assign_type, + &vxlan_link_ops, tb); + if (IS_ERR(dev)) + return dev; + + err = vxlan_dev_configure(net, dev, conf); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + + err = rtnl_configure_link(dev, NULL); + if (err < 0) { + LIST_HEAD(list_kill); + + vxlan_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); + } + + return dev; +} +EXPORT_SYMBOL_GPL(vxlan_dev_create); + static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, struct net_device *dev) { From da6f1da819d4b9c081a477dec74dc468a0b44290 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:06 +0200 Subject: [PATCH 0566/1050] ovs/gre: fix rtnl notifications on iface deletion The function gretap_fb_dev_create() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport") CC: Thomas Graf CC: Pravin B Shelar Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 08deba679c8c..07c5cf1838d8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1149,6 +1149,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto out; + return dev; out: ip_tunnel_dellink(dev, &list_kill); From 41009481b690493c169ce85f591b9d32c6fd9422 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:07 +0200 Subject: [PATCH 0567/1050] ovs/geneve: fix rtnl notifications on iface deletion The function geneve_dev_create_fb() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Fixes: e305ac6cf5a1 ("geneve: Add support to collect tunnel metadata.") CC: Pravin B Shelar CC: Jesse Gross CC: Thomas Graf Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/geneve.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 086c2dae4c3d..305a04e45a13 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1532,6 +1532,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, if (err) goto err; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto err; + return dev; err: From 3ff211270a986f19c7983378ee9c4db2497eaeaf Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 8 Jun 2016 19:04:35 +0800 Subject: [PATCH 0568/1050] drm/amd/powerplay: update powerplay table parsing to handle pptable format change on Polaris boards Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h | 1 + .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 23 +++-- .../drm/amd/powerplay/hwmgr/tonga_pptable.h | 16 ++++ .../powerplay/hwmgr/tonga_processpptables.c | 87 +++++++++++++------ 4 files changed, 95 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h index 347fef127ce9..2930a3355948 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h @@ -39,6 +39,7 @@ struct phm_ppt_v1_clock_voltage_dependency_record { uint8_t phases; uint8_t cks_enable; uint8_t cks_voffset; + uint32_t sclk_offset; }; typedef struct phm_ppt_v1_clock_voltage_dependency_record phm_ppt_v1_clock_voltage_dependency_record; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index aa6be033f21b..ad0995c5c563 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -999,7 +999,7 @@ static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), (dep_table->entries[i].vddc - (uint16_t)data->vddc_vddci_delta)); - *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; } if (POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) @@ -3520,10 +3520,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, ATOM_Tonga_State *state_entry = (ATOM_Tonga_State *)state; ATOM_Tonga_POWERPLAYTABLE *powerplay_table = (ATOM_Tonga_POWERPLAYTABLE *)pp_table; - ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table = - (ATOM_Tonga_SCLK_Dependency_Table *) + PPTable_Generic_SubTable_Header *sclk_dep_table = + (PPTable_Generic_SubTable_Header *) (((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usSclkDependencyTableOffset)); + ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table = (ATOM_Tonga_MCLK_Dependency_Table *) (((unsigned long)powerplay_table) + @@ -3575,7 +3576,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, /* Performance levels are arranged from low to high. */ performance_level->memory_clock = mclk_dep_table->entries [state_entry->ucMemoryClockIndexLow].ulMclk; - performance_level->engine_clock = sclk_dep_table->entries + if (sclk_dep_table->ucRevId == 0) + performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries + [state_entry->ucEngineClockIndexLow].ulSclk; + else if (sclk_dep_table->ucRevId == 1) + performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries [state_entry->ucEngineClockIndexLow].ulSclk; performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap, state_entry->ucPCIEGenLow); @@ -3586,8 +3591,14 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, [polaris10_power_state->performance_level_count++]); performance_level->memory_clock = mclk_dep_table->entries [state_entry->ucMemoryClockIndexHigh].ulMclk; - performance_level->engine_clock = sclk_dep_table->entries + + if (sclk_dep_table->ucRevId == 0) + performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries [state_entry->ucEngineClockIndexHigh].ulSclk; + else if (sclk_dep_table->ucRevId == 1) + performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries + [state_entry->ucEngineClockIndexHigh].ulSclk; + performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap, state_entry->ucPCIEGenHigh); performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap, @@ -3645,7 +3656,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr, switch (state->classification.ui_label) { case PP_StateUILabel_Performance: data->use_pcie_performance_levels = true; - for (i = 0; i < ps->performance_level_count; i++) { if (data->pcie_gen_performance.max < ps->performance_levels[i].pcie_gen) @@ -3661,7 +3671,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr, ps->performance_levels[i].pcie_lane) data->pcie_lane_performance.max = ps->performance_levels[i].pcie_lane; - if (data->pcie_lane_performance.min > ps->performance_levels[i].pcie_lane) data->pcie_lane_performance.min = diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h index 1b44f4e9b8f5..f127198aafc4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h @@ -197,6 +197,22 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Table { ATOM_Tonga_SCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ } ATOM_Tonga_SCLK_Dependency_Table; +typedef struct _ATOM_Polaris_SCLK_Dependency_Record { + UCHAR ucVddInd; /* Base voltage */ + USHORT usVddcOffset; /* Offset relative to base voltage */ + ULONG ulSclk; + USHORT usEdcCurrent; + UCHAR ucReliabilityTemperature; + UCHAR ucCKSVOffsetandDisable; /* Bits 0~6: Voltage offset for CKS, Bit 7: Disable/enable for the SCLK level. */ + ULONG ulSclkOffset; +} ATOM_Polaris_SCLK_Dependency_Record; + +typedef struct _ATOM_Polaris_SCLK_Dependency_Table { + UCHAR ucRevId; + UCHAR ucNumEntries; /* Number of entries. */ + ATOM_Polaris_SCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ +} ATOM_Polaris_SCLK_Dependency_Table; + typedef struct _ATOM_Tonga_PCIE_Record { UCHAR ucPCIEGenSpeed; UCHAR usPCIELaneWidth; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c index 296ec7ef6d45..671fdb4d615a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c @@ -408,41 +408,78 @@ static int get_mclk_voltage_dependency_table( static int get_sclk_voltage_dependency_table( struct pp_hwmgr *hwmgr, phm_ppt_v1_clock_voltage_dependency_table **pp_tonga_sclk_dep_table, - const ATOM_Tonga_SCLK_Dependency_Table * sclk_dep_table + const PPTable_Generic_SubTable_Header *sclk_dep_table ) { uint32_t table_size, i; phm_ppt_v1_clock_voltage_dependency_table *sclk_table; - PP_ASSERT_WITH_CODE((0 != sclk_dep_table->ucNumEntries), - "Invalid PowerPlay Table!", return -1); + if (sclk_dep_table->ucRevId < 1) { + const ATOM_Tonga_SCLK_Dependency_Table *tonga_table = + (ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table; - table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record) - * sclk_dep_table->ucNumEntries; + PP_ASSERT_WITH_CODE((0 != tonga_table->ucNumEntries), + "Invalid PowerPlay Table!", return -1); - sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record) + * tonga_table->ucNumEntries; - if (NULL == sclk_table) - return -ENOMEM; + sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *) + kzalloc(table_size, GFP_KERNEL); - memset(sclk_table, 0x00, table_size); + if (NULL == sclk_table) + return -ENOMEM; - sclk_table->count = (uint32_t)sclk_dep_table->ucNumEntries; + memset(sclk_table, 0x00, table_size); - for (i = 0; i < sclk_dep_table->ucNumEntries; i++) { - sclk_table->entries[i].vddInd = - sclk_dep_table->entries[i].ucVddInd; - sclk_table->entries[i].vdd_offset = - sclk_dep_table->entries[i].usVddcOffset; - sclk_table->entries[i].clk = - sclk_dep_table->entries[i].ulSclk; - sclk_table->entries[i].cks_enable = - (((sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0; - sclk_table->entries[i].cks_voffset = - (sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x7F); + sclk_table->count = (uint32_t)tonga_table->ucNumEntries; + + for (i = 0; i < tonga_table->ucNumEntries; i++) { + sclk_table->entries[i].vddInd = + tonga_table->entries[i].ucVddInd; + sclk_table->entries[i].vdd_offset = + tonga_table->entries[i].usVddcOffset; + sclk_table->entries[i].clk = + tonga_table->entries[i].ulSclk; + sclk_table->entries[i].cks_enable = + (((tonga_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0; + sclk_table->entries[i].cks_voffset = + (tonga_table->entries[i].ucCKSVOffsetandDisable & 0x7F); + } + } else { + const ATOM_Polaris_SCLK_Dependency_Table *polaris_table = + (ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table; + + PP_ASSERT_WITH_CODE((0 != polaris_table->ucNumEntries), + "Invalid PowerPlay Table!", return -1); + + table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record) + * polaris_table->ucNumEntries; + + sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *) + kzalloc(table_size, GFP_KERNEL); + + if (NULL == sclk_table) + return -ENOMEM; + + memset(sclk_table, 0x00, table_size); + + sclk_table->count = (uint32_t)polaris_table->ucNumEntries; + + for (i = 0; i < polaris_table->ucNumEntries; i++) { + sclk_table->entries[i].vddInd = + polaris_table->entries[i].ucVddInd; + sclk_table->entries[i].vdd_offset = + polaris_table->entries[i].usVddcOffset; + sclk_table->entries[i].clk = + polaris_table->entries[i].ulSclk; + sclk_table->entries[i].cks_enable = + (((polaris_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0; + sclk_table->entries[i].cks_voffset = + (polaris_table->entries[i].ucCKSVOffsetandDisable & 0x7F); + sclk_table->entries[i].sclk_offset = polaris_table->entries[i].ulSclkOffset; + } } - *pp_tonga_sclk_dep_table = sclk_table; return 0; @@ -708,8 +745,8 @@ static int init_clock_voltage_dependency( const ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table = (const ATOM_Tonga_MCLK_Dependency_Table *)(((unsigned long) powerplay_table) + le16_to_cpu(powerplay_table->usMclkDependencyTableOffset)); - const ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table = - (const ATOM_Tonga_SCLK_Dependency_Table *)(((unsigned long) powerplay_table) + + const PPTable_Generic_SubTable_Header *sclk_dep_table = + (const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) + le16_to_cpu(powerplay_table->usSclkDependencyTableOffset)); const ATOM_Tonga_Hard_Limit_Table *pHardLimits = (const ATOM_Tonga_Hard_Limit_Table *)(((unsigned long) powerplay_table) + From 871fd8403de10b9ba9c284105475ab52b96be248 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Sun, 12 Jun 2016 11:18:01 +0800 Subject: [PATCH 0569/1050] drm/amd/powerplay: select samu dpm 0 as boot level on polaris. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index ad0995c5c563..1400bc420881 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -4196,12 +4196,9 @@ int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate) { struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); if (!bgate) { - data->smc_state_table.SamuBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); + data->smc_state_table.SamuBootLevel = 0; mm_boot_level_offset = data->dpm_table_start + offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); mm_boot_level_offset /= 4; From 539aae6e3af97c7ec1602ff23e805f2852c2611c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 6 Jun 2016 16:11:52 +0900 Subject: [PATCH 0570/1050] drm/nouveau/Revert "drm/nouveau/device/pci: set as non-CPU-coherent on ARM64" This reverts commit 1733a2ad36741b1812cf8b3f3037c28d0af53f50. There is apparently something amiss with the way the TTM code handles DMA buffers, which the above commit was attempting to work around for arm64 systems with non-coherent PCI. Unfortunately, this completely breaks systems *with* coherent PCI (which appear to be the majority). Booting a plain arm64 defconfig + CONFIG_DRM + CONFIG_DRM_NOUVEAU on a machine with a PCI GPU having coherent dma_map_ops (in this case a 7600GT card plugged into an ARM Juno board) results in a fatal crash: [ 2.803438] nouveau 0000:06:00.0: DRM: allocated 1024x768 fb: 0x9000, bo ffffffc976141c00 [ 2.897662] Unable to handle kernel NULL pointer dereference at virtual address 000001ac [ 2.897666] pgd = ffffff8008e00000 [ 2.897675] [000001ac] *pgd=00000009ffffe003, *pud=00000009ffffe003, *pmd=0000000000000000 [ 2.897680] Internal error: Oops: 96000045 [#1] PREEMPT SMP [ 2.897685] Modules linked in: [ 2.897692] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc5+ #543 [ 2.897694] Hardware name: ARM Juno development board (r1) (DT) [ 2.897699] task: ffffffc9768a0000 ti: ffffffc9768a8000 task.ti: ffffffc9768a8000 [ 2.897711] PC is at __memcpy+0x7c/0x180 [ 2.897719] LR is at OUT_RINGp+0x34/0x70 [ 2.897724] pc : [] lr : [] pstate: 80000045 [ 2.897726] sp : ffffffc9768ab360 [ 2.897732] x29: ffffffc9768ab360 x28: 0000000000000001 [ 2.897738] x27: ffffffc97624c000 x26: 0000000000000000 [ 2.897744] x25: 0000000000000080 x24: 0000000000006c00 [ 2.897749] x23: 0000000000000005 x22: ffffffc97624c010 [ 2.897755] x21: 0000000000000004 x20: 0000000000000004 [ 2.897761] x19: ffffffc9763da000 x18: ffffffc976b2491c [ 2.897766] x17: 0000000000000007 x16: 0000000000000006 [ 2.897771] x15: 0000000000000001 x14: 0000000000000001 [ 2.897777] x13: 0000000000e31b70 x12: ffffffc9768a0080 [ 2.897783] x11: 0000000000000000 x10: fffffffffffffb00 [ 2.897788] x9 : 0000000000000000 x8 : 0000000000000000 [ 2.897793] x7 : 0000000000000000 x6 : 00000000000001ac [ 2.897799] x5 : 00000000ffffffff x4 : 0000000000000000 [ 2.897804] x3 : 0000000000000010 x2 : 0000000000000010 [ 2.897810] x1 : ffffffc97624c010 x0 : 00000000000001ac ... [ 2.898494] Call trace: [ 2.898499] Exception stack(0xffffffc9768ab1a0 to 0xffffffc9768ab2c0) [ 2.898506] b1a0: ffffffc9763da000 0000000000000004 ffffffc9768ab360 ffffff80083465fc [ 2.898513] b1c0: ffffffc976801e00 ffffffc9762b8000 ffffffc9768ab1f0 ffffff80080ec158 [ 2.898520] b1e0: ffffffc9768ab230 ffffff8008496d04 ffffffc975ce6d80 ffffffc9768ab36e [ 2.898527] b200: ffffffc9768ab36f ffffffc9768ab29d ffffffc9768ab29e ffffffc9768a0000 [ 2.898533] b220: ffffffc9768ab250 ffffff80080e70c0 ffffffc9768ab270 ffffff8008496e44 [ 2.898540] b240: 00000000000001ac ffffffc97624c010 0000000000000010 0000000000000010 [ 2.898546] b260: 0000000000000000 00000000ffffffff 00000000000001ac 0000000000000000 [ 2.898552] b280: 0000000000000000 0000000000000000 fffffffffffffb00 0000000000000000 [ 2.898558] b2a0: ffffffc9768a0080 0000000000e31b70 0000000000000001 0000000000000001 [ 2.898566] [] __memcpy+0x7c/0x180 [ 2.898574] [] nv04_fbcon_imageblit+0x1d4/0x2e8 [ 2.898582] [] nouveau_fbcon_imageblit+0xd8/0xe0 [ 2.898591] [] soft_cursor+0x154/0x1d8 [ 2.898598] [] bit_cursor+0x4fc/0x538 [ 2.898605] [] fbcon_cursor+0x134/0x1a8 [ 2.898613] [] hide_cursor+0x38/0xa0 [ 2.898620] [] redraw_screen+0x120/0x228 [ 2.898628] [] fbcon_prepare_logo+0x370/0x3f8 [ 2.898635] [] fbcon_init+0x350/0x560 [ 2.898641] [] visual_init+0xac/0x108 [ 2.898648] [] do_bind_con_driver+0x1c4/0x3a8 [ 2.898655] [] do_take_over_console+0x174/0x1e8 [ 2.898662] [] do_fbcon_takeover+0x74/0x100 [ 2.898669] [] fbcon_event_notify+0x8cc/0x920 [ 2.898680] [] notifier_call_chain+0x50/0x90 [ 2.898685] [] __blocking_notifier_call_chain+0x4c/0x90 [ 2.898691] [] blocking_notifier_call_chain+0x14/0x20 [ 2.898696] [] fb_notifier_call_chain+0x1c/0x28 [ 2.898703] [] register_framebuffer+0x1cc/0x2e0 [ 2.898712] [] drm_fb_helper_initial_config+0x288/0x3e8 [ 2.898719] [] nouveau_fbcon_init+0xe0/0x118 [ 2.898727] [] nouveau_drm_load+0x268/0x890 [ 2.898734] [] drm_dev_register+0xbc/0xc8 [ 2.898740] [] drm_get_pci_dev+0xa0/0x180 [ 2.898747] [] nouveau_drm_probe+0x1a0/0x1e0 [ 2.898755] [] pci_device_probe+0x98/0x110 [ 2.898763] [] driver_probe_device+0x204/0x2b0 [ 2.898770] [] __driver_attach+0xac/0xb0 [ 2.898777] [] bus_for_each_dev+0x60/0xa0 [ 2.898783] [] driver_attach+0x20/0x28 [ 2.898789] [] bus_add_driver+0x1d0/0x238 [ 2.898796] [] driver_register+0x60/0xf8 [ 2.898802] [] __pci_register_driver+0x3c/0x48 [ 2.898809] [] drm_pci_init+0xf4/0x120 [ 2.898818] [] nouveau_drm_init+0x21c/0x230 [ 2.898825] [] do_one_initcall+0x8c/0x190 [ 2.898832] [] kernel_init_freeable+0x14c/0x1f0 [ 2.898839] [] kernel_init+0x10/0x100 [ 2.898845] [] ret_from_fork+0x10/0x40 [ 2.898853] Code: a88120c7 a8c12027 a88120c7 a8c12027 (a88120c7) [ 2.898871] ---[ end trace d5713dcad023ee04 ]--- [ 2.898888] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b In a toss-up between the GPU seeing stale data artefacts on some systems vs. catastrophic kernel crashes on other systems, the latter would seem to take precedence, so revert this change until the real underlying problem can be fixed. Signed-off-by: Robin Murphy Acked-by: Alexandre Courbot [acourbot@nvidia.com: port to Nouveau tree, remove bits in lib/] Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 18fab3973ce5..62ad0300cfa5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1614,7 +1614,7 @@ nvkm_device_pci_func = { .fini = nvkm_device_pci_fini, .resource_addr = nvkm_device_pci_resource_addr, .resource_size = nvkm_device_pci_resource_size, - .cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64), + .cpu_coherent = !IS_ENABLED(CONFIG_ARM), }; int From 6aa85f1129b32b5cd19ec262e7cfc2ddc08263c3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 26 May 2016 17:04:52 +1000 Subject: [PATCH 0571/1050] drm/nouveau/iccsense: fix memory leak Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/subdev/iccsense/base.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c index 323c79abe468..41bd5d0f7692 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c @@ -276,6 +276,8 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev) struct pwr_rail_t *r = &stbl.rail[i]; struct nvkm_iccsense_rail *rail; struct nvkm_iccsense_sensor *sensor; + int (*read)(struct nvkm_iccsense *, + struct nvkm_iccsense_rail *); if (!r->mode || r->resistor_mohm == 0) continue; @@ -284,31 +286,31 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev) if (!sensor) continue; - rail = kmalloc(sizeof(*rail), GFP_KERNEL); - if (!rail) - return -ENOMEM; - switch (sensor->type) { case NVBIOS_EXTDEV_INA209: if (r->rail != 0) continue; - rail->read = nvkm_iccsense_ina209_read; + read = nvkm_iccsense_ina209_read; break; case NVBIOS_EXTDEV_INA219: if (r->rail != 0) continue; - rail->read = nvkm_iccsense_ina219_read; + read = nvkm_iccsense_ina219_read; break; case NVBIOS_EXTDEV_INA3221: if (r->rail >= 3) continue; - rail->read = nvkm_iccsense_ina3221_read; + read = nvkm_iccsense_ina3221_read; break; default: continue; } + rail = kmalloc(sizeof(*rail), GFP_KERNEL); + if (!rail) + return -ENOMEM; sensor->rail_mask |= 1 << r->rail; + rail->read = read; rail->sensor = sensor; rail->idx = r->rail; rail->mohm = r->resistor_mohm; From 62e6d1e59c77316768a663d1328390b4cd33801f Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Jun 2016 11:12:05 +0300 Subject: [PATCH 0572/1050] extcon: palmas: Fix boot up state of VBUS when using GPIO detection If USB cable is connected prior to boot, we don't get any interrupts so we must manually check the VBUS state and report it during probe. If we don't do it then USB controller will never know that peripheral cable was connected till the user unplugs and replugs the cable. Fixes: b7aad8e2685b ("extcon: palmas: Add the support for VBUS detection by using GPIO") Cc: stable@vger.kernel.org Signed-off-by: Roger Quadros Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-palmas.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c index 8b3226dca1d9..caff46c0e214 100644 --- a/drivers/extcon/extcon-palmas.c +++ b/drivers/extcon/extcon-palmas.c @@ -360,6 +360,8 @@ static int palmas_usb_probe(struct platform_device *pdev) palmas_enable_irq(palmas_usb); /* perform initial detection */ + if (palmas_usb->enable_gpio_vbus_detection) + palmas_vbus_irq_handler(palmas_usb->gpio_vbus_irq, palmas_usb); palmas_gpio_id_detect(&palmas_usb->wq_detectid.work); device_set_wakeup_capable(&pdev->dev, true); return 0; From f7a6c1492a2cb596952260a7d5bb0d61ca815173 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 7 Jun 2016 11:32:21 -0500 Subject: [PATCH 0573/1050] arm: pmu: Fix non-devicetree probing There is a problem in the non-devicetree PMU probing where some probe functions may get the number of supported events through smp_call_function_any() using the arm_pmu supported_cpus mask. But at the time the probe function is called, the supported_cpus mask is empty so the call fails. This patch makes sure the mask is set before calling the init function rather than after. Signed-off-by: Mark Salter Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1b8304e1efaa..140436a046c0 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1010,8 +1010,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (!ret) ret = init_fn(pmu); } else { - ret = probe_current_pmu(pmu, probe_table); cpumask_setall(&pmu->supported_cpus); + ret = probe_current_pmu(pmu, probe_table); } if (ret) { From 38b850a73034f075c4088e7511b36ebbef9dce00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 15:27:04 +0100 Subject: [PATCH 0574/1050] arm64: spinlock: order spin_{is_locked,unlock_wait} against local locks spin_is_locked has grown two very different use-cases: (1) [The sane case] API functions may require a certain lock to be held by the caller and can therefore use spin_is_locked as part of an assert statement in order to verify that the lock is indeed held. For example, usage of assert_spin_locked. (2) [The insane case] There are two locks, where a CPU takes one of the locks and then checks whether or not the other one is held before accessing some shared state. For example, the "optimized locking" in ipc/sem.c. In the latter case, the sequence looks like: spin_lock(&sem->lock); if (!spin_is_locked(&sma->sem_perm.lock)) /* Access shared state */ and requires that the spin_is_locked check is ordered after taking the sem->lock. Unfortunately, since our spinlocks are implemented using a LDAXR/STXR sequence, the read of &sma->sem_perm.lock can be speculated before the STXR and consequently return a stale value. Whilst this hasn't been seen to cause issues in practice, PowerPC fixed the same issue in 51d7d5205d33 ("powerpc: Add smp_mb() to arch_spin_is_locked()") and, although we did something similar for spin_unlock_wait in d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") that doesn't actually take care of ordering against local acquisition of a different lock. This patch adds an smp_mb() to the start of our arch_spin_is_locked and arch_spin_unlock_wait routines to ensure that the lock value is always loaded after any other locks have been taken by the current CPU. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index fc9682bfe002..aac64d55cb22 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -31,6 +31,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) unsigned int tmp; arch_spinlock_t lockval; + /* + * Ensure prior spin_lock operations to other locks have completed + * on this CPU before we test whether "lock" is locked. + */ + smp_mb(); + asm volatile( " sevl\n" "1: wfe\n" @@ -148,6 +154,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); /* See arch_spin_unlock_wait */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } From 3a5facd09da848193f5bcb0dea098a298bc1a29d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jun 2016 15:10:57 +0100 Subject: [PATCH 0575/1050] arm64: spinlock: fix spin_unlock_wait for LSE atomics Commit d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") fixed spin_unlock_wait for LL/SC-based atomics under the premise that the LSE atomics (in particular, the LDADDA instruction) are indivisible. Unfortunately, these instructions are only indivisible when used with the -AL (full ordering) suffix and, consequently, the same issue can theoretically be observed with LSE atomics, where a later (in program order) load can be speculated before the write portion of the atomic operation. This patch fixes the issue by performing a CAS of the lock once we've established that it's unlocked, in much the same way as the LL/SC code. Fixes: d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index aac64d55cb22..d5c894253e73 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -43,13 +43,17 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) "2: ldaxr %w0, %2\n" " eor %w1, %w0, %w0, ror #16\n" " cbnz %w1, 1b\n" + /* Serialise against any concurrent lockers */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ - /* LSE atomics */ " nop\n" -" nop\n") +" nop\n", + /* LSE atomics */ +" mov %w1, %w0\n" +" cas %w0, %w0, %2\n" +" eor %w1, %w1, %w0\n") +" cbnz %w1, 2b\n" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : : "memory"); From 13c34fe518624e27589827aa49f68f5f38c95f11 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 13 Jun 2016 18:28:42 +0200 Subject: [PATCH 0576/1050] drm/etnaviv: initialize iommu domain page size Since d16e0faab91 (iommu: Allow selecting page sizes per domain) the iommu core demands the page size to be set per domain, otherwise any mapping attempts will be dropped. Make sure to set a valid page size for the etnaviv iommu. Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 522cfd447892..16353ee81651 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -225,6 +225,7 @@ struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu) etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING; etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops; + etnaviv_domain->domain.pgsize_bitmap = SZ_4K; etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START; etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1; From ae8a7910fb0568531033bd6ebe65590f7a4fa6e2 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 1 Jun 2016 16:46:10 +0100 Subject: [PATCH 0577/1050] iommu/rockchip: Fix zap cache during device attach rk_iommu_command() takes a struct rk_iommu and iterates over the slave MMUs, so this is doubly wrong in that we're passing in the wrong pointer and talking to MMUs that we shouldn't be. Fixes: cd6438c5f844 ("iommu/rockchip: Reconstruct to support multi slaves") Cc: stable@vger.kernel.org Signed-off-by: John Keeping Tested-by: Heiko Stuebner Reviewed-by: Heiko Stuebner Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index c7d6156ff536..25b4627cb57f 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -815,7 +815,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, dte_addr = virt_to_phys(rk_domain->dt); for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr); - rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); + rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } From 775711497202fe376368c25b0c7296ed8803e0ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 10 Jun 2016 17:25:19 +0200 Subject: [PATCH 0578/1050] netfilter: conntrack: destroy kmemcache on module removal I forgot to move the kmem_cache_destroy into the exit path. Fixes: 0c5366b3a8c7 ("netfilter: conntrack: use single slab cache) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312eeb2a4..f204274a9b6b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,6 +1544,8 @@ void nf_conntrack_cleanup_end(void) nf_conntrack_tstamp_fini(); nf_conntrack_acct_fini(); nf_conntrack_expect_fini(); + + kmem_cache_destroy(nf_conntrack_cachep); } /* From a46844021f6182cca7b575295ba33a4734b1b9d9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 11 Jun 2016 12:20:26 +0800 Subject: [PATCH 0579/1050] netfilter: nf_tables: fix wrong check of NFT_SET_MAP in nf_tables_bind_set We should check "i" is used as a dictionary or not, "binding" is already checked before. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b7aa871a174..492f6f8efdda 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2946,7 +2946,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (binding->flags & NFT_SET_MAP && + if (i->flags & NFT_SET_MAP && i->chain == binding->chain) goto bind; } From 8588ac097b49ce8802f11541d9cd6f6667badb34 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 11 Jun 2016 12:20:27 +0800 Subject: [PATCH 0580/1050] netfilter: nf_tables: reject loops from set element jump to chain Liping Zhang says: "Users may add such a wrong nft rules successfully, which will cause an endless jump loop: # nft add rule filter test tcp dport vmap {1: jump test} This is because before we commit, the element in the current anonymous set is inactive, so osp->walk will skip this element and miss the validate check." To resolve this problem, this patch passes the generation mask to the walk function through the iter container structure depending on the code path: 1) If we're dumping the elements, then we have to check if the element is active in the current generation. Thus, we check for the current bit in the genmask. 2) If we're checking for loops, then we have to check if the element is active in the next generation, as we're in the middle of a transaction. Thus, we check for the next bit in the genmask. Based on original patch from Liping Zhang. Reported-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Tested-by: Liping Zhang --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 15 +++++++++------ net/netfilter/nft_hash.c | 3 +-- net/netfilter/nft_rbtree.c | 3 +-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 092235458691..f7c291ff4074 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -167,6 +167,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { + u8 genmask; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 492f6f8efdda..0fd69988f00b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2951,6 +2951,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, goto bind; } + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; @@ -3192,12 +3193,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; + args.cb = cb; + args.skb = skb; + args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -4284,6 +4286,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, binding->chain != chain) continue; + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa016564f90..f39c53a159eb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, goto cont; if (nft_set_elem_expired(&he->ext)) goto cont; - if (!nft_set_elem_active(&he->ext, genmask)) + if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; elem.priv = he; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094af7c1..7201d57b5a93 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { @@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; elem.priv = rbe; From a02f424863610a0a7abd80c468839e59cfa4d0d8 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 11 Jun 2016 12:20:28 +0800 Subject: [PATCH 0581/1050] netfilter: nf_tables: fix wrong destroy anonymous sets if binding fails When we add a nft rule like follows: # nft add rule filter test tcp dport vmap {1: jump test} -ELOOP error will be returned, and the anonymous set will be destroyed. But after that, nf_tables_abort will also try to remove the element and destroy the set, which was already destroyed and freed. If we add a nft wrong rule, nft_tables_abort will do the cleanup work rightly, so nf_tables_set_destroy call here is redundant and wrong, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0fd69988f00b..2c881871db38 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2958,13 +2958,8 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.fn = nf_tables_bind_check_setelem; set->ops->walk(ctx, set, &iter); - if (iter.err < 0) { - /* Destroy anonymous sets if binding fails */ - if (set->flags & NFT_SET_ANONYMOUS) - nf_tables_set_destroy(ctx, set); - + if (iter.err < 0) return iter.err; - } } bind: binding->chain = ctx->chain; From 8fff1722f705ce5023a0d6d77a31a9d013be2a34 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 14 Jun 2016 20:13:04 +0800 Subject: [PATCH 0582/1050] netfilter: nf_tables: fix a wrong check to skip the inactive rules nft_genmask_cur has already done left-shift operator on the gencursor, so there's no need to do left-shift operator on it again. Fixes: ea4bd995b0f2 ("netfilter: nf_tables: add transaction helper functions") Cc: Patrick McHardy Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e9f8dffcc244..fb8b5892b5ff 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -143,7 +143,7 @@ next_rule: list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ - if (unlikely(rule->genmask & (1 << gencursor))) + if (unlikely(rule->genmask & gencursor)) continue; rulenum++; From c56bdcac153e60d96a619a59c7981f2a78cba598 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 18:40:07 +0100 Subject: [PATCH 0583/1050] arm64: spinlock: Ensure forward-progress in spin_unlock_wait Rather than wait until we observe the lock being free (which might never happen), we can also return from spin_unlock_wait if we observe that the lock is now held by somebody else, which implies that it was unlocked but we just missed seeing it in that state. Furthermore, in such a scenario there is no longer a need to write back the value that we loaded, since we know that there has been a lock hand-off, which is sufficient to publish any stores prior to the unlock_wait because the ARm architecture ensures that a Store-Release instruction is multi-copy atomic when observed by a Load-Acquire instruction. The litmus test is something like: AArch64 { 0:X1=x; 0:X3=y; 1:X1=y; 2:X1=y; 2:X3=x; } P0 | P1 | P2 ; MOV W0,#1 | MOV W0,#1 | LDAR W0,[X1] ; STR W0,[X1] | STLR W0,[X1] | LDR W2,[X3] ; DMB SY | | ; LDR W2,[X3] | | ; exists (0:X2=0 /\ 2:X0=1 /\ 2:X2=0) where P0 is doing spin_unlock_wait, P1 is doing spin_unlock and P2 is doing spin_lock. Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index d5c894253e73..e875a5a551d7 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -30,20 +30,39 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { unsigned int tmp; arch_spinlock_t lockval; + u32 owner; /* * Ensure prior spin_lock operations to other locks have completed * on this CPU before we test whether "lock" is locked. */ smp_mb(); + owner = READ_ONCE(lock->owner) << 16; asm volatile( " sevl\n" "1: wfe\n" "2: ldaxr %w0, %2\n" + /* Is the lock free? */ " eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 1b\n" - /* Serialise against any concurrent lockers */ +" cbz %w1, 3f\n" + /* Lock taken -- has there been a subsequent unlock->lock transition? */ +" eor %w1, %w3, %w0, lsl #16\n" +" cbz %w1, 1b\n" + /* + * The owner has been updated, so there was an unlock->lock + * transition that we missed. That means we can rely on the + * store-release of the unlock operation paired with the + * load-acquire of the lock operation to publish any of our + * previous stores to the new lock owner and therefore don't + * need to bother with the writeback below. + */ +" b 4f\n" +"3:\n" + /* + * Serialise against any concurrent lockers by writing back the + * unlocked lock value + */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" @@ -53,9 +72,11 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) " mov %w1, %w0\n" " cas %w0, %w0, %2\n" " eor %w1, %w1, %w0\n") + /* Somebody else wrote to the lock, GOTO 10 and reload the value */ " cbnz %w1, 2b\n" +"4:" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : + : "r" (owner) : "memory"); } From 695e9df010e40f407f4830dc11d53dce957710ba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jun 2016 12:21:40 -0500 Subject: [PATCH 0584/1050] mnt: Account for MS_RDONLY in fs_fully_visible In rare cases it is possible for s_flags & MS_RDONLY to be set but MNT_READONLY to be clear. This starting combination can cause fs_fully_visible to fail to ensure that the new mount is readonly. Therefore force MNT_LOCK_READONLY in the new mount if MS_RDONLY is set on the source filesystem of the mount. In general both MS_RDONLY and MNT_READONLY are set at the same for mounts so I don't expect any programs to care. Nor do I expect MS_RDONLY to be set on proc or sysfs in the initial user namespace, which further decreases the likelyhood of problems. Which means this change should only affect system configurations by paranoid sysadmins who should welcome the additional protection as it keeps people from wriggling out of their policies. Cc: stable@vger.kernel.org Fixes: 8c6cf9cc829f ("mnt: Modify fs_fully_visible to deal with locked ro nodev and atime") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index a7ec92c051f5..783004af5707 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3247,6 +3247,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC) mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC); + /* Don't miss readonly hidden in the superblock flags */ + if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + /* Verify the mount flags are equal to or more permissive * than the proposed new mount. */ From df4565f9ebdc4d6dc50edc6e8fed08004e328332 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 24 May 2016 14:05:05 +0200 Subject: [PATCH 0585/1050] kernel/kcov: unproxify debugfs file's fops Since commit 49d200deaa68 ("debugfs: prevent access to removed files' private data"), a debugfs file's file_operations methods get proxied through lifetime aware wrappers. However, only a certain subset of the file_operations members is supported by debugfs and ->mmap isn't among them -- it appears to be NULL from the VFS layer's perspective. This behaviour breaks the /sys/kernel/debug/kcov file introduced concurrently with commit 5c9a8750a640 ("kernel: add kcov code coverage"). Since that file never gets removed, there is no file removal race and thus, a lifetime checking proxy isn't needed. Avoid the proxying for /sys/kernel/debug/kcov by creating it via debugfs_create_file_unsafe() rather than debugfs_create_file(). Fixes: 49d200deaa68 ("debugfs: prevent access to removed files' private data") Fixes: 5c9a8750a640 ("kernel: add kcov code coverage") Reported-by: Sasha Levin Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- kernel/kcov.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index a02f2dddd1d7..8d44b3fea9d0 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = { static int __init kcov_init(void) { - if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) { + /* + * The kcov debugfs file won't ever get removed and thus, + * there is no need to protect it against removal races. The + * use of debugfs_create_file_unsafe() is actually safe here. + */ + if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { pr_err("failed to create kcov in debugfs\n"); return -ENOMEM; } From b10e3e90485e32e4cea9e35d2295ee7bffaeff73 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 24 May 2016 13:08:53 +0200 Subject: [PATCH 0586/1050] debugfs: full_proxy_open(): free proxy on ->open() failure Debugfs' full_proxy_open(), the ->open() installed at all inodes created through debugfs_create_file(), - grabs a reference to the original struct file_operations instance passed to debugfs_create_file(), - dynamically allocates a proxy struct file_operations instance wrapping the original - and installs this at the file's ->f_op. Afterwards, it calls the original ->open() and passes its return value back to the VFS layer. Now, if that return value indicates failure, the VFS layer won't ever call ->release() and thus, neither the reference to the original file_operations nor the memory for the proxy file_operations will get released, i.e. both are leaked. Upon failure of the original fops' ->open(), undo the proxy installation. That is: - Set the struct file ->f_op to what it had been when full_proxy_open() was entered. - Drop the reference to the original file_operations. - Free the memory holding the proxy file_operations. Fixes: 49d200deaa68 ("debugfs: prevent access to removed files' private data") Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 9c1c9a01b7e5..d1ec80331414 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -262,8 +262,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp) if (real_fops->open) { r = real_fops->open(inode, filp); - - if (filp->f_op != proxy_fops) { + if (r) { + replace_fops(filp, d_inode(dentry)->i_fop); + goto free_proxy; + } else if (filp->f_op != proxy_fops) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); From 75f0b68b75dabb3ff551440163fd67b3fc62901a Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 24 May 2016 13:08:54 +0200 Subject: [PATCH 0587/1050] debugfs: open_proxy_open(): avoid double fops release Debugfs' open_proxy_open(), the ->open() installed at all inodes created through debugfs_create_file_unsafe(), - grabs a reference to the original file_operations instance passed to debugfs_create_file_unsafe() via fops_get(), - installs it at the file's ->f_op by means of replace_fops() - and calls fops_put() on it. Since the semantics of replace_fops() are such that the reference's ownership is transferred, the subsequent fops_put() will result in a double release when the file is eventually closed. Currently, this is not an issue since fops_put() basically does a module_put() on the file_operations' ->owner only and there don't exist any modules calling debugfs_create_file_unsafe() yet. This is expected to change in the future though, c.f. commit c64688081490 ("debugfs: add support for self-protecting attribute file fops"). Remove the call to fops_put() from open_proxy_open(). Fixes: 9fd4dcece43a ("debugfs: prevent access to possibly dead file_operations at file open") Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index d1ec80331414..592059f88e04 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp) r = real_fops->open(inode, filp); out: - fops_put(real_fops); debugfs_use_file_finish(srcu_idx); return r; } From d0e13f5bbe4be7c8f27736fc40503dcec04b7de0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Jun 2016 14:18:59 +0200 Subject: [PATCH 0588/1050] ovl: fix uid/gid when creating over whiteout Fix a regression when creating a file over a whiteout. The new file/directory needs to use the current fsuid/fsgid, not the ones from the mounter's credentials. The refcounting is a bit tricky: prepare_creds() sets an original refcount, override_creds() gets one more, which revert_cred() drops. So 1) we need to expicitly put the mounter's credentials when overriding with the updated one 2) we need to put the original ref to the updated creds (and this can safely be done before revert_creds(), since we'll still have the ref from override_creds()). Reported-by: Stephen Smalley Fixes: 3fe6e52f0626 ("ovl: override creds with the ones from the superblock mounter") Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 22f0253a3567..c2a6b0894022 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -405,12 +405,21 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, err = ovl_create_upper(dentry, inode, &stat, link, hardlink); } else { const struct cred *old_cred; + struct cred *override_cred; old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_create_over_whiteout(dentry, inode, &stat, link, - hardlink); + err = -ENOMEM; + override_cred = prepare_creds(); + if (override_cred) { + override_cred->fsuid = old_cred->fsuid; + override_cred->fsgid = old_cred->fsgid; + put_cred(override_creds(override_cred)); + put_cred(override_cred); + err = ovl_create_over_whiteout(dentry, inode, &stat, + link, hardlink); + } revert_creds(old_cred); } From 1208fd569c07ab84aa5d024abd863267c2953b4a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 May 2016 17:07:17 -0400 Subject: [PATCH 0589/1050] SUNRPC: fix xprt leak on xps allocation failure Callers of rpc_create_xprt expect it to put the xprt on success and failure. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 06b4df9faaa1..173c5dd2d751 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -453,9 +453,10 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt_switch *xps; xps = xprt_switch_alloc(xprt, GFP_KERNEL); - if (xps == NULL) + if (xps == NULL) { + xprt_put(xprt); return ERR_PTR(-ENOMEM); - + } clnt = rpc_new_client(args, xps, xprt, NULL); if (IS_ERR(clnt)) return clnt; From d50039ea5ee63c589b0434baa5ecf6e5075bb6f9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 16 May 2016 17:03:42 -0400 Subject: [PATCH 0590/1050] nfsd4/rpc: move backchannel create logic into rpc code Also simplify the logic a bit. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 18 +----------------- include/linux/sunrpc/clnt.h | 2 -- net/sunrpc/clnt.c | 12 ++++++++++-- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7389cb1d7409..04c68d900324 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } -static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) -{ - struct rpc_xprt *xprt; - - if (args->protocol != XPRT_TRANSPORT_BC_TCP) - return rpc_create(args); - - xprt = args->bc_xprt->xpt_bc_xprt; - if (xprt) { - xprt_get(xprt); - return rpc_create_xprt(args, xprt); - } - - return rpc_create(args); -} - static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { int maxtime = max_cb_time(clp->net); @@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = create_backchannel_client(&args); + client = rpc_create(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 19c659d1c0f8..b6810c92b8bb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -137,8 +137,6 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, - struct rpc_xprt *xprt); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 173c5dd2d751..b33721d41adc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -446,7 +446,7 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; @@ -484,7 +484,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -510,6 +509,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) From 39a9beab5acb83176e8b9a4f0778749a09341f1f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 May 2016 12:38:21 -0400 Subject: [PATCH 0591/1050] rpc: share one xps between all backchannels The spec allows backchannels for multiple clients to share the same tcp connection. When that happens, we need to use the same xprt for all of them. Similarly, we need the same xps. This fixes list corruption introduced by the multipath code. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 1 + include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 18 ++++++++++++++---- net/sunrpc/svc_xprt.c | 2 ++ net/sunrpc/xprtsock.c | 1 + 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b7dabc4baafd..79ba50856707 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -84,6 +84,7 @@ struct svc_xprt { struct net *xpt_net; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5aa3834619a8..5e3e1b63dbb3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -297,6 +297,7 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *bc_xps; unsigned int flags; }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b33721d41adc..2808d550d273 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -452,10 +452,20 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_clnt *clnt = NULL; struct rpc_xprt_switch *xps; - xps = xprt_switch_alloc(xprt, GFP_KERNEL); - if (xps == NULL) { - xprt_put(xprt); - return ERR_PTR(-ENOMEM); + if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xps = args->bc_xprt->xpt_bc_xps; + xprt_switch_get(xps); + } else { + xps = xprt_switch_alloc(xprt, GFP_KERNEL); + if (xps == NULL) { + xprt_put(xprt); + return ERR_PTR(-ENOMEM); + } + if (xprt->bc_xprt) { + xprt_switch_get(xps); + xprt->bc_xprt->xpt_bc_xps = xps; + } } clnt = rpc_new_client(args, xps, xprt, NULL); if (IS_ERR(clnt)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f5572e31d518..4f01f63102ee 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -136,6 +136,8 @@ static void svc_xprt_free(struct kref *kref) /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); + if (xprt->xpt_bc_xps) + xprt_switch_put(xprt->xpt_bc_xps); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2d3e0c42361e..7e2b2fa189c3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3057,6 +3057,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) return xprt; args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: From f52a4c74efbb61195490535e9d65d964c4ebfee3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Jun 2016 00:26:49 +0000 Subject: [PATCH 0592/1050] ata: fix return value check in ahci_seattle_get_port_info() In case of error, the function devm_kzalloc() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Acked-by: Brijesh Singh Signed-off-by: Tejun Heo --- drivers/ata/ahci_seattle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c index 6e702ab57220..1d31c0c0fc20 100644 --- a/drivers/ata/ahci_seattle.c +++ b/drivers/ata/ahci_seattle.c @@ -137,7 +137,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info( u32 val; plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL); - if (IS_ERR(plat_data)) + if (!plat_data) return &ahci_port_info; plat_data->sgpio_ctrl = devm_ioremap_resource(dev, From 0c5ddb51e8f7be7170600f95a4ea92e5a32afad8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 07:50:25 -0700 Subject: [PATCH 0593/1050] net/mlx4_en: initialize cmd.context_lock spinlock earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maciej Żenczykowski reported lockdep warning a spinlock was not registered before being held in mlx4_cmd_wake_completions() cmd.context_lock initialization is not at the right place. 1) mlx4_cmd_use_events() can be called multiple times. Calling spin_lock_init() on a live spinlock can lead to hangs. 2) mlx4_cmd_wake_completions() can be called while lock has not been initialized. Lockdep complains, and current logic is not race prone. It seems better to move the initialization earlier in mlx4_load_one() Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Eugenia Emantayev Cc: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 1 - drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e94ca1c3fc7c..f04a423ff79d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) priv->cmd.free_head = 0; sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); - spin_lock_init(&priv->cmd.context_lock); for (priv->cmd.token_mask = 1; priv->cmd.token_mask < priv->cmd.max_cmds; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12c77a70abdb..372ebfa880f5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3222,6 +3222,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); + spin_lock_init(&priv->cmd.context_lock); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); From 3d7c8257d999bdf8fa77ffd9be775c7b58cc7b69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 11:33:32 -0700 Subject: [PATCH 0594/1050] net_sched: prio: insure proper transactional behavior Now prio_init() can return -ENOMEM, it also has to make sure any allocated qdiscs are freed, since the caller (qdisc_create()) wont call ->destroy() handler for us. More generally, we want a transactional behavior for "tc qdisc change ...", so prio_tune() should not make modifications if any error is returned. It means that we must validate parameters and allocate missing qdisc(s) before taking root qdisc lock exactly once, to not leave the prio qdisc in an intermediate state. Fixes: cbdf45116478 ("net_sched: prio: properly report out of memory errors") Signed-off-by: Eric Dumazet Reported-by: Cong Wang Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 59 ++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 071718bccdab..a356450b747b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -172,8 +172,9 @@ prio_destroy(struct Qdisc *sch) static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *queues[TCQ_PRIO_BANDS]; + int oldbands = q->bands, i; struct tc_prio_qopt *qopt; - int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -187,54 +188,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < qopt->bands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!queues[i]) { + while (i > oldbands) + qdisc_destroy(queues[--i]); + return -ENOMEM; + } + } + sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { + for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; - q->queues[i] = &noop_qdisc; - if (child != &noop_qdisc) { - qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); - } + + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); + qdisc_destroy(child); } + + for (i = oldbands; i < q->bands; i++) + q->queues[i] = queues[i]; + sch_tree_unlock(sch); - - for (i = 0; i < q->bands; i++) { - struct Qdisc *child; - - if (q->queues[i] != &noop_qdisc) - continue; - - child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (!child) - return -ENOMEM; - sch_tree_lock(sch); - q->queues[i] = child; - sch_tree_unlock(sch); - } return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { - struct prio_sched_data *q = qdisc_priv(sch); - int i; - - for (i = 0; i < TCQ_PRIO_BANDS; i++) - q->queues[i] = &noop_qdisc; - - if (opt == NULL) { + if (!opt) return -EINVAL; - } else { - int err; - if ((err = prio_tune(sch, opt)) != 0) - return err; - } - return 0; + return prio_tune(sch, opt); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) From d15eccea69b96a5116169688dcc9baf6d1ce2751 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Jun 2016 13:44:14 -0700 Subject: [PATCH 0595/1050] act_ipt: fix a bind refcnt leak And avoid calling tcf_hash_check() twice. Fixes: a57f19d30b2d ("net sched: ipt action fix late binding") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9f002ada7074..d4bd19ee5822 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -121,10 +121,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) + if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } - if (!tcf_hash_check(tn, index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); if (ret) From ebecaa6662b0a9c3590bd644a4cec6f9d96818b7 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 13 Jun 2016 18:08:42 -0400 Subject: [PATCH 0596/1050] net sched actions: bug fix dumping actions directly didnt produce NLMSG_DONE This refers to commands to direct action access as follows: sudo tc actions add action drop index 12 sudo tc actions add action pipe index 10 And then dumping them like so: sudo tc actions ls action gact iproute2 worked because it depended on absence of TCA_ACT_TAB TLV as end of message. This fix has been tested with iproute2 and is backward compatible. Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 336774a535c3..c7a0b0d481c0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1118,7 +1118,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); ret = skb->len; } else - nla_nest_cancel(skb, nest); + nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) From 0ee13627f963f9c5c9544ed19d82854836d5e676 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jun 2016 06:16:27 +0200 Subject: [PATCH 0597/1050] htb: call qdisc_root with rcu read lock held saw a debug splat: net/include/net/sch_generic.h:287 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by kworker/2:1/710: #0: ("events"){.+.+.+}, at: [] #1: ((&q->work)){+.+...}, at: [] process_one_work+0x14d/0x690 Workqueue: events htb_work_func Call Trace: [] dump_stack+0x85/0xc2 [] lockdep_rcu_suspicious+0xe7/0x120 [] htb_work_func+0x67/0x70 Signed-off-by: Florian Westphal Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d4b4218af6b1..62f9d8100c6e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1007,7 +1007,9 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } static int htb_init(struct Qdisc *sch, struct nlattr *opt) From b196c22af5c3ff784c472c80f6fb4e5fad67b2ac Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:14 +0200 Subject: [PATCH 0598/1050] macsec: add rcu_barrier() on module exit Without this, the various uses of call_rcu could cause a kernel panic. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 47ee2c840b55..e80736f6acd7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3361,6 +3361,7 @@ static void __exit macsec_exit(void) genl_unregister_family(&macsec_fam); rtnl_link_unregister(&macsec_link_ops); unregister_netdevice_notifier(&macsec_notifier); + rcu_barrier(); } module_init(macsec_init); From 5d9649b3a524df9ccd15ac25ad6591089260fdbd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:15 +0200 Subject: [PATCH 0599/1050] macsec: allocate sg and iv on the heap For the crypto callbacks to work properly, we cannot have sg and iv on the stack. Use kmalloc instead, with a single allocation for aead_request + scatterlist + iv. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 46 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e80736f6acd7..189ea3e8e8a0 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -605,12 +605,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err) dev_put(dev); } +static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, + unsigned char **iv, + struct scatterlist **sg) +{ + size_t size, iv_offset, sg_offset; + struct aead_request *req; + void *tmp; + + size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm); + iv_offset = size; + size += GCM_AES_IV_LEN; + + size = ALIGN(size, __alignof__(struct scatterlist)); + sg_offset = size; + size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); + + tmp = kmalloc(size, GFP_ATOMIC); + if (!tmp) + return NULL; + + *iv = (unsigned char *)(tmp + iv_offset); + *sg = (struct scatterlist *)(tmp + sg_offset); + req = tmp; + + aead_request_set_tfm(req, tfm); + + return req; +} + static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct net_device *dev) { int ret; - struct scatterlist sg[MAX_SKB_FRAGS + 1]; - unsigned char iv[GCM_AES_IV_LEN]; + struct scatterlist *sg; + unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; size_t unprotected_len; @@ -668,8 +697,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, macsec_fill_sectag(hh, secy, pn); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); - macsec_fill_iv(iv, secy->sci, pn); - skb_put(skb, secy->icv_len); if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) { @@ -684,13 +711,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC); + req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } + macsec_fill_iv(iv, secy->sci, pn); + sg_init_table(sg, MAX_SKB_FRAGS + 1); skb_to_sgvec(skb, sg, 0, skb->len); @@ -861,7 +890,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err) out: macsec_rxsa_put(rx_sa); dev_put(dev); - return; } static struct sk_buff *macsec_decrypt(struct sk_buff *skb, @@ -871,8 +899,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, struct macsec_secy *secy) { int ret; - struct scatterlist sg[MAX_SKB_FRAGS + 1]; - unsigned char iv[GCM_AES_IV_LEN]; + struct scatterlist *sg; + unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; u16 icv_len = secy->icv_len; @@ -882,7 +910,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, if (!skb) return ERR_PTR(-ENOMEM); - req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC); + req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); From 6052f7fbce857e327218a9d8a040e210ea7cc718 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:16 +0200 Subject: [PATCH 0600/1050] macsec: fix SA initialization The ASYNC flag prevents initialization on some physical machines. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 189ea3e8e8a0..0e7eff7f1cd2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1262,7 +1262,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) struct crypto_aead *tfm; int ret; - tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (!tfm || IS_ERR(tfm)) return NULL; From 7d669f50847481c52faf0656aea7b4be63113210 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:23:45 -0500 Subject: [PATCH 0601/1050] kvm: svm: Fix implicit declaration for __default_cpu_present_to_apicid() The commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") introduces a build error due to implicit function declaration when #ifdef CONFIG_X86_32 and #ifndef CONFIG_X86_LOCAL_APIC (as reported by Kbuild test robot i386-randconfig-x0-06121009). So, this patch introduces kvm_cpu_get_apicid() wrapper around __default_cpu_present_to_apicid() with additional handling if CONFIG_X86_LOCAL_APIC is not defined. Reported-by: kbuild test robot Fixes: commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 11 +++++++++++ arch/x86/kvm/svm.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e0fbe7e70dc1..69e62862b622 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -1368,4 +1369,14 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +static inline int kvm_cpu_get_apicid(int mps_cpu) +{ +#ifdef CONFIG_X86_LOCAL_APIC + return __default_cpu_present_to_apicid(mps_cpu); +#else + WARN_ON_ONCE(1); + return BAD_APICID; +#endif +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1163e8173e5a..8a3c571e4a86 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1324,7 +1324,7 @@ free_avic: static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) { u64 entry; - int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu); + int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -1349,7 +1349,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ - int h_physical_id = __default_cpu_present_to_apicid(cpu); + int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -4236,7 +4236,7 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) if (avic_vcpu_is_running(vcpu)) wrmsrl(SVM_AVIC_DOORBELL, - __default_cpu_present_to_apicid(vcpu->cpu)); + kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); } From 5b8abf1f33ccd9f1cbc4248ade3cd507d9319c48 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:24:36 -0500 Subject: [PATCH 0602/1050] kvm: svm: Do not support AVIC if not CONFIG_X86_LOCAL_APIC Add logic to disable AVIC #ifndef CONFIG_X86_LOCAL_APIC. Suggested-by: Paolo Bonzini Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8a3c571e4a86..16ef31b87452 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -238,7 +238,9 @@ module_param(nested, int, S_IRUGO); /* enable / disable AVIC */ static int avic; +#ifdef CONFIG_X86_LOCAL_APIC module_param(avic, int, S_IRUGO); +#endif static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -981,11 +983,14 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))) - avic = false; - - if (avic) - pr_info("AVIC enabled\n"); + if (avic) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_AVIC) || + !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) + avic = false; + else + pr_info("AVIC enabled\n"); + } return 0; From 5dc8a864be0820677e7fce85d2832d4387c7bb88 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Jun 2016 15:17:40 -0300 Subject: [PATCH 0603/1050] Update my main e-mails at the Kernel tree For the third time in three years, I'm changing my e-mail at Samsung. That's bad, as it may stop communications with me for a while. So, this time, I'll also add the mchehab@kernel.org e-mail, as it remains stable since ever. Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- .mailmap | 1 + CREDITS | 1 + MAINTAINERS | 51 ++++++++++++++++++++----------- drivers/media/v4l2-core/v4l2-mc.c | 2 +- include/media/v4l2-mc.h | 2 +- 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/.mailmap b/.mailmap index 08b80428f583..4a293bea648d 100644 --- a/.mailmap +++ b/.mailmap @@ -89,6 +89,7 @@ Leonid I Ananiev Linas Vepstas Mark Brown Matthieu CASTET +Mauro Carvalho Chehab Mayuresh Janorkar Michael Buesch Michel Dänzer diff --git a/CREDITS b/CREDITS index 0f0bf22afe0c..2a3fbcd229e6 100644 --- a/CREDITS +++ b/CREDITS @@ -649,6 +649,7 @@ D: Configure, Menuconfig, xconfig N: Mauro Carvalho Chehab E: m.chehab@samsung.org +E: mchehab@osg.samsung.com E: mchehab@infradead.org D: Media subsystem (V4L/DVB) drivers and core D: EDAC drivers and EDAC 3.0 core rework diff --git a/MAINTAINERS b/MAINTAINERS index 16700e4fcc4a..29e22bf887a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2242,7 +2242,8 @@ F: include/net/ax25.h F: net/ax25/ AZ6007 DVB DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -2709,7 +2710,8 @@ F: Documentation/filesystems/btrfs.txt F: fs/btrfs/ BTTV VIDEO4LINUX DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -3344,7 +3346,8 @@ S: Maintained F: drivers/media/dvb-frontends/cx24120* CX88 VIDEO4LINUX DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -4291,7 +4294,8 @@ F: fs/ecryptfs/ EDAC-CORE M: Doug Thompson M: Borislav Petkov -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next @@ -4336,7 +4340,8 @@ S: Maintained F: drivers/edac/e7xxx_edac.c EDAC-GHES -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/ghes_edac.c @@ -4360,19 +4365,22 @@ S: Maintained F: drivers/edac/i5000_edac.c EDAC-I5400 -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/i5400_edac.c EDAC-I7300 -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/i7300_edac.c EDAC-I7CORE -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/i7core_edac.c @@ -4409,7 +4417,8 @@ S: Maintained F: drivers/edac/r82600_edac.c EDAC-SBRIDGE -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/sb_edac.c @@ -4468,7 +4477,8 @@ S: Maintained F: drivers/net/ethernet/ibm/ehea/ EM28XX VIDEO4LINUX DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -7358,7 +7368,8 @@ S: Supported F: drivers/media/pci/netup_unidvb/* MEDIA INPUT INFRASTRUCTURE (V4L/DVB) -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab P: LinuxTV.org Project L: linux-media@vger.kernel.org W: https://linuxtv.org @@ -9852,7 +9863,8 @@ S: Odd Fixes F: drivers/media/i2c/saa6588* SAA7134 VIDEO4LINUX DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -10371,7 +10383,8 @@ S: Maintained F: drivers/media/radio/si4713/radio-usb-si4713.c SIANO DVB DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -11137,7 +11150,8 @@ S: Maintained F: drivers/media/i2c/tda9840* TEA5761 TUNER DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -11145,7 +11159,8 @@ S: Odd fixes F: drivers/media/tuners/tea5761.* TEA5767 TUNER DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -11532,7 +11547,8 @@ F: include/linux/shmem_fs.h F: mm/shmem.c TM6000 VIDEO4LINUX DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git @@ -12518,7 +12534,8 @@ S: Maintained F: arch/x86/entry/vdso/ XC2028/3028 TUNER DRIVER -M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab +M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c index ca94bded3386..8bef4331bd51 100644 --- a/drivers/media/v4l2-core/v4l2-mc.c +++ b/drivers/media/v4l2-core/v4l2-mc.c @@ -1,7 +1,7 @@ /* * Media Controller ancillary functions * - * Copyright (c) 2016 Mauro Carvalho Chehab + * Copyright (c) 2016 Mauro Carvalho Chehab * Copyright (C) 2016 Shuah Khan * Copyright (C) 2006-2010 Nokia Corporation * Copyright (c) 2016 Intel Corporation. diff --git a/include/media/v4l2-mc.h b/include/media/v4l2-mc.h index 98a938aabdfb..7a8d6037a4bb 100644 --- a/include/media/v4l2-mc.h +++ b/include/media/v4l2-mc.h @@ -1,7 +1,7 @@ /* * v4l2-mc.h - Media Controller V4L2 types and prototypes * - * Copyright (C) 2016 Mauro Carvalho Chehab + * Copyright (C) 2016 Mauro Carvalho Chehab * Copyright (C) 2006-2010 Nokia Corporation * Copyright (c) 2016 Intel Corporation. * From 90effdcd2b8a39927aace655c72088a62911e8e1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 14 Jun 2016 16:30:27 -0600 Subject: [PATCH 0604/1050] Update email addresses in MAINTAINERS and .mailmap Updating email addresses in MAINTAINERS and .mailmap files. Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan Signed-off-by: Linus Torvalds --- .mailmap | 1 + MAINTAINERS | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 4a293bea648d..779a9caadc7f 100644 --- a/.mailmap +++ b/.mailmap @@ -123,6 +123,7 @@ Santosh Shilimkar Sascha Hauer S.Çağlar Onur Shiraz Hashim +Shuah Khan Simon Kelley Stéphane Witzmann Stephen Hemminger diff --git a/MAINTAINERS b/MAINTAINERS index 29e22bf887a9..7c326f155001 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6497,6 +6497,7 @@ F: include/uapi/linux/sunrpc/ KERNEL SELFTEST FRAMEWORK M: Shuah Khan +M: Shuah Khan L: linux-kselftest@vger.kernel.org T: git git://git.kernel.org/pub/scm/shuah/linux-kselftest S: Maintained @@ -11902,7 +11903,8 @@ F: drivers/usb/common/usb-otg-fsm.c USB OVER IP DRIVER M: Valentina Manea -M: Shuah Khan +M: Shuah Khan +M: Shuah Khan L: linux-usb@vger.kernel.org S: Maintained F: Documentation/usb/usbip_protocol.txt From feb9dad5209280085d5b0c094fa67e7a8d75c81a Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 14 Jun 2016 23:28:04 -0400 Subject: [PATCH 0605/1050] nfsd: Always lock state exclusively. It used to be the case that state had an rwlock that was locked for write by downgrades, but for read for upgrades (opens). Well, the problem is if there are two competing opens for the same state, they step on each other toes potentially leading to leaking file descriptors from the state structure, since access mode is a bitmap only set once. Signed-off-by: Oleg Drokin Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 40 ++++++++++++++++++++-------------------- fs/nfsd/state.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f5f82e145018..c927d36d8ac6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3502,7 +3502,7 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4335,10 +4335,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } } else { @@ -4348,19 +4348,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (swapstp) { nfs4_put_stid(&stp->st_stid); stp = swapstp; - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } goto upgrade_out; } - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); release_open_stateid(stp); goto out; } @@ -4372,7 +4372,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } upgrade_out: nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4977,12 +4977,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; - down_write(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); return status; } @@ -5030,7 +5030,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -5062,12 +5062,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -5143,7 +5143,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5196,7 +5196,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); @@ -5422,7 +5422,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5591,7 +5591,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; - up_write(&open_stp->st_rwsem); + mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5600,7 +5600,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); if (status == nfs_ok) - down_write(&lock_stp->st_rwsem); + mutex_lock(&lock_stp->st_mutex); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5704,7 +5704,7 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - up_write(&lock_stp->st_rwsem); + mutex_unlock(&lock_stp->st_mutex); /* * If this is a new, never-before-used stateid, and we are @@ -5874,7 +5874,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 986e51e5ceac..64053eadeb81 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -535,7 +535,7 @@ struct nfs4_ol_stateid { unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; - struct rw_semaphore st_rwsem; + struct mutex st_mutex; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) From 5cc1fb2a093e254b656c64ff24b0b76bed1d34d9 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 14 Jun 2016 23:28:05 -0400 Subject: [PATCH 0606/1050] nfsd: Extend the mutex holding region around in nfsd4_process_open2() To avoid racing entry into nfs4_get_vfs_file(). Make init_open_stateid() return with locked stateid to be unlocked by the caller. Signed-off-by: Oleg Drokin Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c927d36d8ac6..94854a06c9ad 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3487,6 +3487,10 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + /* We are moving these outside of the spinlocks to avoid the warnings */ + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); + spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3502,13 +3506,17 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - mutex_init(&stp->st_mutex); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + if (retstp) { + mutex_lock(&retstp->st_mutex); + /* Not that we need to, just for neatness */ + mutex_unlock(&stp->st_mutex); + } return retstp; } @@ -4344,11 +4352,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } else { stp = open->op_stp; open->op_stp = NULL; + /* + * init_open_stateid() either returns a locked stateid + * it found, or initializes and locks the new one we passed in + */ swapstp = init_open_stateid(stp, fp, open); if (swapstp) { nfs4_put_stid(&stp->st_stid); stp = swapstp; - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { @@ -4357,7 +4368,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } goto upgrade_out; } - mutex_lock(&stp->st_mutex); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); From 8c7245abda877d4689b3371db8ae2a4400d7d9ce Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 14 Jun 2016 23:28:06 -0400 Subject: [PATCH 0607/1050] nfsd: Make init_open_stateid() a bit more whole Move the state selection logic inside from the caller, always making it return correct stp to use. Signed-off-by: J . Bruce Fields Signed-off-by: Oleg Drokin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 94854a06c9ad..70d0b9b33031 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3480,13 +3480,14 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } static struct nfs4_ol_stateid * -init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, - struct nfsd4_open *open) +init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + struct nfs4_ol_stateid *stp; + stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); @@ -3497,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; + + open->op_stp = NULL; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3514,10 +3517,11 @@ out_unlock: spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { mutex_lock(&retstp->st_mutex); - /* Not that we need to, just for neatness */ + /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); + stp = retstp; } - return retstp; + return stp; } /* @@ -4313,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; - struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4350,16 +4353,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - stp = open->op_stp; - open->op_stp = NULL; - /* - * init_open_stateid() either returns a locked stateid - * it found, or initializes and locks the new one we passed in - */ - swapstp = init_open_stateid(stp, fp, open); - if (swapstp) { - nfs4_put_stid(&stp->st_stid); - stp = swapstp; + /* stp is returned locked. */ + stp = init_open_stateid(fp, open); + /* See if we lost the race to some other thread */ + if (stp->st_access_bmap != 0) { status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { From 7e1b1fc4dabd6ec8e28baa0708866e13fa93c9b3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 10 Jun 2016 10:54:32 +0200 Subject: [PATCH 0608/1050] base: make module_create_drivers_dir race-free Modules which register drivers via standard path (driver_register) in parallel can cause a warning: WARNING: CPU: 2 PID: 3492 at ../fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80 sysfs: cannot create duplicate filename '/module/saa7146/drivers' Modules linked in: hexium_gemini(+) mxb(+) ... ... Call Trace: ... [] sysfs_warn_dup+0x62/0x80 [] sysfs_create_dir_ns+0x77/0x90 [] kobject_add_internal+0xb4/0x340 [] kobject_add+0x68/0xb0 [] kobject_create_and_add+0x31/0x70 [] module_add_driver+0xc3/0xd0 [] bus_add_driver+0x154/0x280 [] driver_register+0x60/0xe0 [] __pci_register_driver+0x60/0x70 [] saa7146_register_extension+0x64/0x90 [saa7146] [] hexium_init_module+0x11/0x1000 [hexium_gemini] ... As can be (mostly) seen, driver_register causes this call sequence: -> bus_add_driver -> module_add_driver -> module_create_drivers_dir The last one creates "drivers" directory in /sys/module/<...>. When this is done in parallel, the directory is attempted to be created twice at the same time. This can be easily reproduced by loading mxb and hexium_gemini in parallel: while :; do modprobe mxb & modprobe hexium_gemini wait rmmod mxb hexium_gemini saa7146_vv saa7146 done saa7146 calls pci_register_driver for both mxb and hexium_gemini, which means /sys/module/saa7146/drivers is to be created for both of them. Fix this by a new mutex in module_create_drivers_dir which makes the test-and-create "drivers" dir atomic. I inverted the condition and removed 'return' to avoid multiple unlocks or a goto. Signed-off-by: Jiri Slaby Fixes: fe480a2675ed (Modules: only add drivers/ direcory if needed) Cc: v2.6.21+ Signed-off-by: Greg Kroah-Hartman --- drivers/base/module.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index db930d3ee312..2a215780eda2 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv) static void module_create_drivers_dir(struct module_kobject *mk) { - if (!mk || mk->drivers_dir) - return; + static DEFINE_MUTEX(drivers_dir_mutex); - mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj); + mutex_lock(&drivers_dir_mutex); + if (mk && !mk->drivers_dir) + mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj); + mutex_unlock(&drivers_dir_mutex); } void module_add_driver(struct module *mod, struct device_driver *drv) From 66d95b6705a6347f7b2645e042874ec0bb03b726 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 15 Jun 2016 14:10:57 +0800 Subject: [PATCH 0609/1050] tipc: fix suspicious RCU usage When run tipcTS&tipcTC test suite, the following complaint appears: [ 56.926168] =============================== [ 56.926169] [ INFO: suspicious RCU usage. ] [ 56.926171] 4.7.0-rc1+ #160 Not tainted [ 56.926173] ------------------------------- [ 56.926174] net/tipc/bearer.c:408 suspicious rcu_dereference_protected() usage! [ 56.926175] [ 56.926175] other info that might help us debug this: [ 56.926175] [ 56.926177] [ 56.926177] rcu_scheduler_active = 1, debug_locks = 1 [ 56.926179] 3 locks held by swapper/4/0: [ 56.926180] #0: (((&req->timer))){+.-...}, at: [] call_timer_fn+0x5/0x340 [ 56.926203] #1: (&(&req->lock)->rlock){+.-...}, at: [] disc_timeout+0x1b/0xd0 [tipc] [ 56.926212] #2: (rcu_read_lock){......}, at: [] tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc] [ 56.926218] [ 56.926218] stack backtrace: [ 56.926221] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 4.7.0-rc1+ #160 [ 56.926222] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 56.926224] 0000000000000000 ffff880016803d28 ffffffff813c4423 ffff8800154252c0 [ 56.926227] 0000000000000001 ffff880016803d58 ffffffff810b7512 ffff8800124d8120 [ 56.926230] ffff880013f8a160 ffff8800132b5ccc ffff8800124d8120 ffff880016803d88 [ 56.926234] Call Trace: [ 56.926235] [] dump_stack+0x67/0x94 [ 56.926250] [] lockdep_rcu_suspicious+0xe2/0x120 [ 56.926256] [] tipc_l2_send_msg+0x131/0x1c0 [tipc] [ 56.926261] [] tipc_bearer_xmit_skb+0x14c/0x2e0 [tipc] [ 56.926266] [] ? tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc] [ 56.926273] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926278] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926283] [] disc_timeout+0x56/0xd0 [tipc] [ 56.926288] [] call_timer_fn+0xb8/0x340 [ 56.926291] [] ? call_timer_fn+0x5/0x340 [ 56.926296] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926300] [] run_timer_softirq+0x23a/0x390 [ 56.926306] [] ? clockevents_program_event+0x7f/0x130 [ 56.926316] [] __do_softirq+0xc3/0x4a2 [ 56.926323] [] irq_exit+0x8a/0xb0 [ 56.926327] [] smp_apic_timer_interrupt+0x46/0x60 [ 56.926331] [] apic_timer_interrupt+0x89/0x90 [ 56.926333] [] ? default_idle+0x2a/0x1a0 [ 56.926340] [] ? default_idle+0x28/0x1a0 [ 56.926342] [] arch_cpu_idle+0xf/0x20 [ 56.926345] [] default_idle_call+0x2f/0x50 [ 56.926347] [] cpu_startup_entry+0x215/0x3e0 [ 56.926353] [] start_secondary+0xf9/0x100 The warning appears as rtnl_dereference() is wrongly used in tipc_l2_send_msg() under RCU read lock protection. Instead the proper usage should be that rcu_dereference_rtnl() is called here. Fixes: 5b7066c3dd24 ("tipc: stricter filtering of packets in bearer layer") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6f11c62bc8f9..bf8f05c3eb82 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -405,7 +405,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, return 0; /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rtnl_dereference(dev->tipc_ptr); + tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) goto drop; From c91522f860bb9dd4178c8280bbebd4f4321b7199 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 15 Jun 2016 14:11:31 +0800 Subject: [PATCH 0610/1050] tipc: eliminate uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/tipc/link.c: In function ‘tipc_link_timeout’: net/tipc/link.c:744:28: warning: ‘mtyp’ may be used uninitialized in this function [-Wuninitialized] Fixes: 42b18f605fea ("tipc: refactor function tipc_link_timeout()") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 7059c94f33c5..67b6ab9f4c8d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -704,7 +704,8 @@ static void link_profile_stats(struct tipc_link *l) */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int mtyp, rc = 0; + int mtyp = 0; + int rc = 0; bool state = false; bool probe = false; bool setup = false; From 4a1836701fd59476ee1331379e00a9ab51ba4f61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 15 Jun 2016 17:45:51 +0200 Subject: [PATCH 0611/1050] net: skfb: remove obsolete -I cflag The skfp driver has been moved to drivers/net/fddi/skfp a long time ago, but we still attempt to include headers from the old location, which causes a warning when building with W=1: cc1: error: /git/arm-soc/drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs] cc1: error: drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs] Clearly this include directive is not needed any more, so we can just remove it now. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/fddi/skfp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile index b0be0234abf6..a957a1c7e5ba 100644 --- a/drivers/net/fddi/skfp/Makefile +++ b/drivers/net/fddi/skfp/Makefile @@ -17,4 +17,4 @@ skfp-objs := skfddi.o hwmtm.o fplustm.o smt.o cfm.o \ # projects. To keep the source common for all those drivers (and # thus simplify fixes to it), please do not clean it up! -ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes +ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes From daddef76c3deaaa7922f9d7b18edbf0a061215c3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 15 Jun 2016 11:14:53 +0200 Subject: [PATCH 0612/1050] net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG The implementation of net_dbg_ratelimited in the CONFIG_DYNAMIC_DEBUG case was added with 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case"). The implementation strategy was to take the usual definition of the dynamic_pr_debug macro, but alter it by adding a call to "net_ratelimit()" in the if statement. This is, in fact, the correct approach. However, while doing this, the author of the commit forgot to surround fmt by pr_fmt, resulting in unprefixed log messages appearing in the console. So, this commit adds back the pr_fmt(fmt) invocation, making net_dbg_ratelimited properly consistent across DEBUG, no DEBUG, and DYNAMIC_DEBUG cases, and bringing parity with the behavior of dynamic_pr_debug as well. Fixes: 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case") Signed-off-by: Jason A. Donenfeld Cc: Tim Bingham Signed-off-by: David S. Miller --- include/linux/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index 9aa49a05fe38..25aa03b51c4e 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -251,7 +251,8 @@ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ net_ratelimit()) \ - __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ + ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jun 2016 06:24:00 -0700 Subject: [PATCH 0613/1050] gre: fix error handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) gre_parse_header() can be called from gre_err() At this point transport header points to ICMP header, not the inner header. 2) We can not really change transport header as ipgre_err() will later assume transport header still points to ICMP header (using icmp_hdr()) 3) pskb_may_pull() logic in gre_parse_header() really works if we are interested at zone pointed by skb->data 4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in ICMP error processing") we should not pull headers in error handler. So this fix : A) changes gre_parse_header() to use skb->data instead of skb_transport_header() B) Adds a nhs parameter to gre_parse_header() so that we can skip the not pulled IP header from error path. This offset is 0 for normal receive path. C) remove obsolete IPV6 includes Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Maciej Żenczykowski Cc: Jiri Benc Signed-off-by: David S. Miller --- include/net/gre.h | 2 +- net/ipv4/gre_demux.c | 10 +++++----- net/ipv4/ip_gre.c | 12 ++++-------- net/ipv6/ip6_gre.c | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index 5dce30a6abe3..7a54a31d1d4c 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto); + bool *csum_err, __be16 proto, int nhs); static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4fd332a..de1d119a4497 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07c5cf1838d8..1d000af7f561 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#include -#include -#endif - /* Problems & solutions -------------------- @@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de276ab1..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; From 19de99f70b87fcc3338da52a89c439b088cbff71 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Jun 2016 18:25:38 -0700 Subject: [PATCH 0614/1050] bpf: fix matching of data/data_end in verifier The ctx structure passed into bpf programs is different depending on bpf program type. The verifier incorrectly marked ctx->data and ctx->data_end access based on ctx offset only. That caused loads in tracing programs int bpf_prog(struct pt_regs *ctx) { .. ctx->ax .. } to be incorrectly marked as PTR_TO_PACKET which later caused verifier to reject the program that was actually valid in tracing context. Fix this by doing program type specific matching of ctx offsets. Fixes: 969bf05eb3ce ("bpf: direct packet access") Reported-by: Sasha Goldshtein Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 28 ++++++++++++++++++++++++++- kernel/bpf/verifier.c | 41 +++++++--------------------------------- kernel/trace/bpf_trace.c | 6 ++++-- net/core/filter.c | 16 ++++++++++++++-- 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8ee27b8afe81..8269cafc6eb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -111,6 +111,31 @@ enum bpf_access_type { BPF_WRITE = 2 }; +/* types of values stored in eBPF registers */ +enum bpf_reg_type { + NOT_INIT = 0, /* nothing was written into register */ + UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ + PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + FRAME_PTR, /* reg == frame_pointer */ + PTR_TO_STACK, /* reg == frame_pointer + imm */ + CONST_IMM, /* constant integer value */ + + /* PTR_TO_PACKET represents: + * skb->data + * skb->data + imm + * skb->data + (u16) var + * skb->data + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) menas that 'imm' was added + */ + PTR_TO_PACKET, + PTR_TO_PACKET_END, /* skb->data + headlen */ +}; + struct bpf_prog; struct bpf_verifier_ops { @@ -120,7 +145,8 @@ struct bpf_verifier_ops { /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ - bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + bool (*is_valid_access)(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 668e07903c8f..eec9f90ba030 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -126,31 +126,6 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -/* types of values stored in eBPF registers */ -enum bpf_reg_type { - NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ - PTR_TO_CTX, /* reg points to bpf_context */ - CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ - PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, - PTR_TO_PACKET_END, /* skb->data + headlen */ -}; - struct reg_state { enum bpf_reg_type type; union { @@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields */ static int check_ctx_access(struct verifier_env *env, int off, int size, - enum bpf_access_type t) + enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t)) { + env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, mark_reg_unknown_value(state->regs, value_regno); } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = UNKNOWN_VALUE; + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - err = check_ctx_access(env, off, size, t); + err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (off == offsetof(struct __sk_buff, data) && - env->allow_ptr_leaks) + if (env->allow_ptr_leaks) /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = PTR_TO_PACKET; - else if (off == offsetof(struct __sk_buff, data_end) && - env->allow_ptr_leaks) - state->regs[value_regno].type = PTR_TO_PACKET_END; + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 720b7bb01d43..e7af6cb9d5cf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -349,7 +349,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func } /* bpf+kprobe programs can access fields of 'struct pt_regs' */ -static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { /* check bounds */ if (off < 0 || off >= sizeof(struct pt_regs)) @@ -427,7 +428,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } } -static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; diff --git a/net/core/filter.c b/net/core/filter.c index 68adb5f52110..c4b330c85c02 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2085,7 +2085,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) } static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { switch (off) { case offsetof(struct __sk_buff, tc_classid): @@ -2108,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size, } static bool tc_cls_act_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (type == BPF_WRITE) { switch (off) { @@ -2123,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } } + + switch (off) { + case offsetof(struct __sk_buff, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct __sk_buff, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + return __is_valid_access(off, size, type); } From ad572d174787daa59e24b8b5c83028c09cdb5ddb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Jun 2016 18:25:39 -0700 Subject: [PATCH 0615/1050] bpf, trace: check event type in bpf_perf_event_read similar to bpf_perf_event_output() the bpf_perf_event_read() helper needs to check the type of the perf_event before reading the counter. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e7af6cb9d5cf..26f603da7e26 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -209,6 +209,10 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) event->pmu->count) return -EINVAL; + if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && + event->attr.type != PERF_TYPE_RAW)) + return -EINVAL; + /* * we don't know if the function is run successfully by the * return value. It can be judged in other places, such as From 4e384ac19b5be6ad084b215d298b82b3a91ad24b Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:17 +0800 Subject: [PATCH 0616/1050] r8152: disable MAC clock speed down Disable MAC clock speed down. It may casue the first control transfer to contain the wrong data, when the power state change from U1 to U0. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3f9f6ed3eec4..89dc752f92bd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3382,15 +3382,11 @@ static void r8153_init(struct r8152 *tp) r8153_power_cut_en(tp, false); r8153_u1u2en(tp, true); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN | - U1U2_SPDWN_EN | L1_SPDWN_EN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN | - TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN | - EEE_SPDWN_EN); + /* MAC clock speed down */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); r8153_enable_eee(tp); r8153_aldps_en(tp, true); From 93fe9b1838404fcc3bea320b704bfa461d8e57a6 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:18 +0800 Subject: [PATCH 0617/1050] r8152: reset the bmu Reset the BMU to clear the rx/tx fifo. This avoids that the unexpected data remains in the hw. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 89dc752f92bd..f5bc351e09ef 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -116,6 +116,7 @@ #define USB_TX_DMA 0xd434 #define USB_TOLERANCE 0xd490 #define USB_LPM_CTRL 0xd41a +#define USB_BMU_RESET 0xd4b0 #define USB_UPS_CTRL 0xd800 #define USB_MISC_0 0xd81a #define USB_POWER_CUT 0xd80a @@ -338,6 +339,10 @@ #define TEST_MODE_DISABLE 0x00000001 #define TX_SIZE_ADJUST1 0x00000100 +/* USB_BMU_RESET */ +#define BMU_RESET_EP_IN 0x01 +#define BMU_RESET_EP_OUT 0x02 + /* USB_UPS_CTRL */ #define POWER_CUT 0x0100 @@ -2456,6 +2461,17 @@ static void r8153_teredo_off(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0); } +static void rtl_reset_bmu(struct r8152 *tp) +{ + u32 ocp_data; + + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET); + ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT); + ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); + ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT; + ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); +} + static void r8152_aldps_en(struct r8152 *tp, bool enable) { if (enable) { @@ -2681,6 +2697,7 @@ static void r8153_first_init(struct r8152 *tp) r8153_hw_phy_cfg(tp); rtl8152_nic_reset(tp); + rtl_reset_bmu(tp); ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; @@ -2742,6 +2759,7 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); rtl_disable(tp); + rtl_reset_bmu(tp); for (i = 0; i < 1000; i++) { ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); @@ -2803,6 +2821,7 @@ static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); rtl_disable(tp); + rtl_reset_bmu(tp); r8153_aldps_en(tp, true); usb_enable_lpm(tp->udev); } From a59e6d815226b70780427648e359da9bbee07171 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:19 +0800 Subject: [PATCH 0618/1050] r8152: correct the rx early size The rx early size should be (agg_buf_sz - packet size) / 8 Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f5bc351e09ef..4e257b8d8f3e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -31,7 +31,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "3" +#define NET_VERSION "4" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2174,7 +2174,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) static void r8153_set_rx_early_size(struct r8152 *tp) { u32 mtu = tp->netdev->mtu; - u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4; + u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8; ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data); } From ec48a1d981fe90ecb5bcfaaf1ae2c69d842cbbbc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:12 -0600 Subject: [PATCH 0619/1050] coresight: Fix NULL pointer dereference in _coresight_build_path _coresight_build_path assumes that all the connections of a csdev has the child_dev initialised. This may not be true if the particular component is not supported by the kernel config(e.g TPIU) but is present in the DT. In which case, building a path can cause a crash like this : Unable to handle kernel NULL pointer dereference at virtual address 00000010 pgd = ffffffc9750dd000 [00000010] *pgd=00000009f5e90003, *pud=00000009f5e90003, *pmd=0000000000000000 Internal error: Oops: 96000006 [#1] PREEMPT SMP Modules linked in: CPU: 4 PID: 1348 Comm: bash Not tainted 4.6.0-next-20160517 #1646 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc97517a280 ti: ffffffc9762c4000 task.ti: ffffffc9762c4000 PC is at _coresight_build_path+0x18/0xe4 LR is at _coresight_build_path+0xc0/0xe4 pc : [] lr : [] pstate: 20000145 sp : ffffffc9762c7ba0 [] _coresight_build_path+0x18/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] coresight_build_path+0x40/0x68 [] coresight_enable+0x74/0x1bc [] enable_source_store+0x3c/0x6c [] dev_attr_store+0x18/0x28 [] sysfs_kf_write+0x40/0x50 [] kernfs_fop_write+0x140/0x1cc [] __vfs_write+0x28/0x110 [] vfs_write+0xa0/0x174 [] SyS_write+0x44/0xa0 [] el0_svc_naked+0x24/0x28 Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 5443d03a1eec..0fdaaf4a8994 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -385,7 +385,6 @@ static int _coresight_build_path(struct coresight_device *csdev, int i; bool found = false; struct coresight_node *node; - struct coresight_connection *conn; /* An activated sink has been found. Enqueue the element */ if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || @@ -394,8 +393,9 @@ static int _coresight_build_path(struct coresight_device *csdev, /* Not a sink - recursively explore each port found on this element */ for (i = 0; i < csdev->nr_outport; i++) { - conn = &csdev->conns[i]; - if (_coresight_build_path(conn->child_dev, path) == 0) { + struct coresight_device *child_dev = csdev->conns[i].child_dev; + + if (child_dev && _coresight_build_path(child_dev, path) == 0) { found = true; break; } From 8e215298a15d5b93c6fa22895c406da538769bca Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:13 -0600 Subject: [PATCH 0620/1050] coresight: Fix tmc_read_unprepare_etr At the end of the trace capture, we free the allocated memory, resetting the drvdata->buf to NULL, to indicate that trace data was collected and the next trace session should allocate the memory in tmc_enable_etr_sink_sysfs. The tmc_enable_etr_sink_sysfs, we only allocate memory if drvdata->vaddr is not NULL (which is not performed at the end of previous session). This can cause, drvdata->vaddr getting assigned NULL and later we do memset() which causes a crash as below : Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ffffffc9747f0000 [00000000] *pgd=00000009f402e003, *pud=00000009f402e003, *pmd=0000000000000000 Internal error: Oops: 96000046 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1592 Comm: bash Not tainted 4.7.0-rc1+ #1712 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc078fe0080 ti: ffffffc974178000 task.ti: ffffffc974178000 PC is at __memset+0x1ac/0x200 LR is at tmc_enable_etr_sink+0xf8/0x304 pc : [] lr : [] pstate: 400001c5 sp : ffffffc97417bc00 x29: ffffffc97417bc00 x28: ffffffc974178000 Call trace: Exception stack(0xffffffc97417ba40 to 0xffffffc97417bb60) ba40: 0000000000000001 ffffffc974a5d098 ffffffc97417bc00 ffffff80083a002c ba60: ffffffc974a5d118 0000000000000000 0000000000000000 0000000000000000 ba80: 0000000000000001 0000000000000000 ffffff800859bdec 0000000000000040 baa0: ffffff8008b45b58 00000000000001c0 ffffffc97417baf0 ffffff80080eddb4 bac0: 0000000000000003 ffffffc078fe0080 ffffffc078fe0960 ffffffc078fe0940 bae0: 0000000000000000 0000000000000000 00000000007fffc0 0000000000000004 bb00: 0000000000000000 0000000000000040 000000000000003f 0000000000000000 bb20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 bb40: ffffffc078fe0960 0000000000000018 ffffffffffffffff 0008669628000000 [] __memset+0x1ac/0x200 [] coresight_enable_path+0xa8/0x1dc [] coresight_enable+0x88/0x1b8 [] enable_source_store+0x3c/0x6c [] dev_attr_store+0x18/0x28 [] sysfs_kf_write+0x54/0x64 [] kernfs_fop_write+0x148/0x1d8 [] __vfs_write+0x28/0x110 [] vfs_write+0xa0/0x198 [] SyS_write+0x44/0xa0 [] el0_svc_naked+0x24/0x28 Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428) This patch fixes the issue by clearing the drvdata->vaddr while we free the allocated buffer at the end of a session, so that we allocate the memory again. Cc: mathieu.poirier@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 847d1b5f2c13..3369d7a80a51 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -315,7 +315,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) */ vaddr = drvdata->vaddr; paddr = drvdata->paddr; - drvdata->buf = NULL; + drvdata->buf = drvdata->vaddr = NULL; } drvdata->reading = false; From f3b8172fe15fbed0d0d33d99780e122213e00684 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:14 -0600 Subject: [PATCH 0621/1050] coresight: Fix erroneous memset in tmc_read_unprepare_etr At the end of a trace collection, we try to clear the entire buffer and enable the ETR back if it was already enabled. But, we would have adjusted the drvdata->buf to point to the beginning of the trace data in the trace buffer @drvdata->vaddr. So, the following code which clears the buffer is dangerous and can cause crashes, like below : memset(drvdata->buf, 0, drvdata->size); Unable to handle kernel paging request at virtual address ffffff800a145000 pgd = ffffffc974726000 *pgd=00000009f3e91003, *pud=00000009f3e91003, *pmd=0000000000000000 PREEMPT SMP Modules linked in: CPU: 4 PID: 1692 Comm: dd Not tainted 4.7.0-rc2+ #1721 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc9734a0080 ti: ffffffc974460000 task.ti: ffffffc974460000 PC is at __memset+0x1ac/0x200 LR is at tmc_read_unprepare_etr+0x144/0x1bc pc : [] lr : [] pstate: 200001c5 ... [] __memset+0x1ac/0x200 [] tmc_release+0x90/0x94 [] __fput+0xa8/0x1ec [] ____fput+0xc/0x14 [] task_work_run+0xb0/0xe4 [] do_notify_resume+0x64/0x6c [] work_pending+0x10/0x14 Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428) Since we clear the buffer anyway in the following call to tmc_etr_enable_hw(), remove the erroneous memset(). Fixes: commit de5461970b3e9e1 ("coresight: tmc: allocating memory when needed") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 3369d7a80a51..688be9e060fc 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -300,13 +300,10 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace - * buffer. As such zero-out the buffer so that we don't end - * up with stale data. - * - * Since the tracer is still enabled drvdata::buf - * can't be NULL. + * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), + * so we don't have to explicitly clear it. Also, since the + * tracer is still enabled drvdata::buf can't be NULL. */ - memset(drvdata->buf, 0, drvdata->size); tmc_etr_enable_hw(drvdata); } else { /* From 5014e904681ddbdf663bb20f134eb053ddccb181 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 6 May 2016 15:35:50 +0100 Subject: [PATCH 0622/1050] coresight: Handle build path error Enabling a component via sysfs (echo 1 > enable_source), would trigger building a path from the enabled sources to the sink. If there is an error in the process (e.g, sink not enabled or the device (CPU corresponding to ETM) is not online), we never report failure, except for leaving a message in the dmesg. Do proper error checking for the build path and return the error. Before: $ echo 0 > /sys/devices/system/cpu/cpu2/online $ echo 1 > /sys/devices/cs_etm/cpu2/enable_source $ echo $? 0 After: $ echo 0 > /sys/devices/system/cpu/cpu2/online $ echo 1 > /sys/devices/cs_etm/cpu2/enable_source -bash: echo: write error: No such device or address Signed-off-by: Suzuki K Poulose Acked-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 0fdaaf4a8994..d08d1ab9bba5 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -425,6 +425,7 @@ out: struct list_head *coresight_build_path(struct coresight_device *csdev) { struct list_head *path; + int rc; path = kzalloc(sizeof(struct list_head), GFP_KERNEL); if (!path) @@ -432,9 +433,10 @@ struct list_head *coresight_build_path(struct coresight_device *csdev) INIT_LIST_HEAD(path); - if (_coresight_build_path(csdev, path)) { + rc = _coresight_build_path(csdev, path); + if (rc) { kfree(path); - path = NULL; + return ERR_PTR(rc); } return path; @@ -507,8 +509,9 @@ int coresight_enable(struct coresight_device *csdev) goto out; path = coresight_build_path(csdev); - if (!path) { + if (IS_ERR(path)) { pr_err("building path(s) failed\n"); + ret = PTR_ERR(path); goto out; } From caf1ff26e1aa178133df68ac3d40815fed2187d9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 15 Jun 2016 18:00:33 +0800 Subject: [PATCH 0623/1050] kvm: Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES These days, we experienced one guest crash with 8 cores and 3 disks, with qemu error logs as bellow: qemu-system-x86_64: /build/qemu-2.0.0/kvm-all.c:984: kvm_irqchip_commit_routes: Assertion `ret == 0' failed. And then we found one patch(bdf026317d) in qemu tree, which said could fix this bug. Execute the following script will reproduce the BUG quickly: irq_affinity.sh ======================================================================== vda_irq_num=25 vdb_irq_num=27 while [ 1 ] do for irq in {1,2,4,8,10,20,40,80} do echo $irq > /proc/irq/$vda_irq_num/smp_affinity echo $irq > /proc/irq/$vdb_irq_num/smp_affinity dd if=/dev/vda of=/dev/zero bs=4K count=100 iflag=direct dd if=/dev/vdb of=/dev/zero bs=4K count=100 iflag=direct done done ======================================================================== The following qemu log is added in the qemu code and is displayed when this bug reproduced: kvm_irqchip_commit_routes: max gsi: 1008, nr_allocated_irq_routes: 1024, irq_routes->nr: 1024, gsi_count: 1024. That's to say when irq_routes->nr == 1024, there are 1024 routing entries, but in the kernel code when routes->nr >= 1024, will just return -EINVAL; The nr is the number of the routing entries which is in of [1 ~ KVM_MAX_IRQ_ROUTES], not the index in [0 ~ KVM_MAX_IRQ_ROUTES - 1]. This patch fix the BUG above. Cc: stable@vger.kernel.org Signed-off-by: Xiubo Li Signed-off-by: Wei Tang Signed-off-by: Zhang Zhuoyu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 02e98f3131bd..48bd520fc702 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2941,7 +2941,7 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) + if (routing.nr > KVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) goto out; From a0052191624e9bf8a8f9dc41b92ab5f252566c3c Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 13 Jun 2016 09:56:56 +0800 Subject: [PATCH 0624/1050] kvm: vmx: check apicv is active before using VT-d posted interrupt VT-d posted interrupt is relying on the CPU side's posted interrupt. Need to check whether VCPU's APICv is active before enabing VT-d posted interrupt. Fixes: d62caabb41f33d96333f9ef15e09cd26e1c12760 Cc: stable@vger.kernel.org Signed-off-by: Yang Zhang Signed-off-by: Shengge Ding Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fb93010beaa4..003618e324ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) unsigned int dest; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; /* Set SN when the vCPU is preempted */ @@ -10714,7 +10716,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return 0; vcpu->pre_pcpu = vcpu->cpu; @@ -10780,7 +10783,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu) unsigned long flags; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -10833,7 +10837,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, int idx, ret = -EINVAL; if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(kvm->vcpus[0])) return 0; idx = srcu_read_lock(&kvm->irq_srcu); From 8f50b8e57442d28e41bb736c173d8a2490549a82 Mon Sep 17 00:00:00 2001 From: "Ocquidant, Sebastien" Date: Wed, 15 Jun 2016 13:47:35 +0200 Subject: [PATCH 0625/1050] memory: omap-gpmc: Fix omap gpmc EXTRADELAY timing In the omap gpmc driver it can be noticed that GPMC_CONFIG4_OEEXTRADELAY is overwritten by the WEEXTRADELAY value from the device tree and GPMC_CONFIG4_WEEXTRADELAY is not updated by the value from the device tree. As a consequence, the memory accesses cannot be configured properly when the extra delay are needed for OE and WE. Fix the update of GPMC_CONFIG4_WEEXTRADELAY with the value from the device tree file and prevents GPMC_CONFIG4_OEXTRADELAY being overwritten by the WEXTRADELAY value from the device tree. Cc: stable@vger.kernel.org Signed-off-by: Ocquidant, Sebastien Signed-off-by: Roger Quadros --- drivers/memory/omap-gpmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index af4884ba6b7c..15508df24e5d 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -398,7 +398,7 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p) gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4, GPMC_CONFIG4_OEEXTRADELAY, p->oe_extra_delay); gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4, - GPMC_CONFIG4_OEEXTRADELAY, p->we_extra_delay); + GPMC_CONFIG4_WEEXTRADELAY, p->we_extra_delay); gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG6, GPMC_CONFIG6_CYCLE2CYCLESAMECSEN, p->cycle2cyclesamecsen); From 9254e70c4ef1fee2e5c43feded4433d19cbb6177 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 9 Jun 2016 12:28:13 +0200 Subject: [PATCH 0626/1050] s390/cpum_cf: use perf software context for hardware counters On s390, there are two different hardware PMUs for counting and sampling. Previously, both PMUs have shared the perf_hw_context which is not correct and, recently, results in this warning: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at kernel/events/core.c:8485 perf_pmu_register+0x420/0x428 Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc1+ #2 task: 00000009c5240000 ti: 00000009c5234000 task.ti: 00000009c5234000 Krnl PSW : 0704c00180000000 0000000000220c50 (perf_pmu_register+0x420/0x428) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 0000000000b15ac6 0000000000000000 00000009cb440000 000000000022087a 0000000000000000 0000000000b78fa0 0000000000000000 0000000000a9aa90 0000000000000084 0000000000000005 000000000088a97a 0000000000000004 0000000000749dd0 000000000022087a 00000009c5237cc0 Krnl Code: 0000000000220c44: a7f4ff54 brc 15,220aec 0000000000220c48: 92011000 mvi 0(%r1),1 #0000000000220c4c: a7f40001 brc 15,220c4e >0000000000220c50: a7f4ff12 brc 15,220a74 0000000000220c54: 0707 bcr 0,%r7 0000000000220c56: 0707 bcr 0,%r7 0000000000220c58: ebdff0800024 stmg %r13,%r15,128(%r15) 0000000000220c5e: a7f13fe0 tmll %r15,16352 Call Trace: ([<000000000022087a>] perf_pmu_register+0x4a/0x428) ([<0000000000b2c25c>] init_cpum_sampling_pmu+0x14c/0x1f8) ([<0000000000100248>] do_one_initcall+0x48/0x140) ([<0000000000b25d26>] kernel_init_freeable+0x1e6/0x2a0) ([<000000000072bda4>] kernel_init+0x24/0x138) ([<000000000073495e>] kernel_thread_starter+0x6/0xc) ([<0000000000734958>] kernel_thread_starter+0x0/0xc) Last Breaking-Event-Address: [<0000000000220c4c>] perf_pmu_register+0x41c/0x428 ---[ end trace 0c6ef9f5b771ad97 ]--- Using the perf_sw_context is an option because the cpum_cf PMU does not use interrupts. To make this more clear, initialize the capabilities in the PMU structure. Signed-off-by: Hendrik Brueckner Suggested-by: Peter Zijlstra Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 59215c518f37..7ec63b1d920d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) /* Performance monitoring unit for s390x */ static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, .event_init = cpumf_pmu_event_init, @@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void) goto out; } - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { From 79ee2e8f730411a30b271d5f9cdeae189fa66174 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 16 Jun 2016 19:03:11 +0530 Subject: [PATCH 0627/1050] PM / OPP: Add 'UNKNOWN' status for shared_opp in struct opp_table dev_pm_opp_get_sharing_cpus() returns 0 even in the case when the OPP core doesn't know whether or not the table is shared. It works on the majority of platforms, where the OPP table is never created before invoking the function and then -ENODEV is returned by it. But in the case of one platform (Jetson TK1) at least, the situation is a bit different. The OPP table has been created (somehow) before dev_pm_opp_get_sharing_cpus() is called and it returns 0. Its caller treats that as 'the CPUs don't share OPPs' and that leads to degraded performance. Fix this by converting 'shared_opp' in struct opp_table to an enum and making dev_pm_opp_get_sharing_cpus() return -EINVAL in case when the value of that field is "access unknown", so that the caller can handle it accordingly (cpufreq-dt considers that as 'all CPUs share the table', for example). Fixes: 6f707daa3833 "PM / OPP: Add dev_pm_opp_get_sharing_cpus()" Reported-and-tested-by: Alexandre Courbot Signed-off-by: Viresh Kumar [ rjw : Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/cpu.c | 12 +++++++++--- drivers/base/power/opp/of.c | 10 ++++++++-- drivers/base/power/opp/opp.h | 8 +++++++- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 83d6e7ba1a34..8c3434bdb26d 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -211,7 +211,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, } /* Mark opp-table as multiple CPUs are sharing it now */ - opp_table->shared_opp = true; + opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; } unlock: mutex_unlock(&opp_table_lock); @@ -227,7 +227,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); * * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev. * - * Returns -ENODEV if OPP table isn't already present. + * Returns -ENODEV if OPP table isn't already present and -EINVAL if the OPP + * table's status is access-unknown. * * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks @@ -249,9 +250,14 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) goto unlock; } + if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) { + ret = -EINVAL; + goto unlock; + } + cpumask_clear(cpumask); - if (opp_table->shared_opp) { + if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) { list_for_each_entry(opp_dev, &opp_table->dev_list, node) cpumask_set_cpu(opp_dev->dev->id, cpumask); } else { diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 94d2010558e3..1dfd3dd92624 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -34,7 +34,10 @@ static struct opp_table *_managed_opp(const struct device_node *np) * But the OPPs will be considered as shared only if the * OPP table contains a "opp-shared" property. */ - return opp_table->shared_opp ? opp_table : NULL; + if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) + return opp_table; + + return NULL; } } @@ -353,7 +356,10 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) } opp_table->np = opp_np; - opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + if (of_property_read_bool(opp_np, "opp-shared")) + opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; + else + opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE; mutex_unlock(&opp_table_lock); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 20f3be22e060..fabd5ca1a083 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -119,6 +119,12 @@ struct opp_device { #endif }; +enum opp_table_access { + OPP_TABLE_ACCESS_UNKNOWN = 0, + OPP_TABLE_ACCESS_EXCLUSIVE = 1, + OPP_TABLE_ACCESS_SHARED = 2, +}; + /** * struct opp_table - Device opp structure * @node: table node - contains the devices with OPPs that @@ -166,7 +172,7 @@ struct opp_table { /* For backward compatibility with v1 bindings */ unsigned int voltage_tolerance_v1; - bool shared_opp; + enum opp_table_access shared_opp; struct dev_pm_opp *suspend_opp; unsigned int *supported_hw; From 362761299eea7dfc3a4870551de36e08758b9254 Mon Sep 17 00:00:00 2001 From: Marcin Niestroj Date: Tue, 14 Jun 2016 15:29:24 +0200 Subject: [PATCH 0628/1050] power_supply: tps65217-charger: Fix NULL deref during property export This bug leads to: [ 1.906411] Unable to handle kernel NULL pointer dereference at virtual address 0000000c [ 1.914878] pgd = c0004000 [ 1.917786] [0000000c] *pgd=00000000 [ 1.921536] Internal error: Oops: 5 [#1] SMP ARM [ 1.926357] Modules linked in: [ 1.929556] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.4.5 #18 [ 1.936006] Hardware name: Generic AM33XX (Flattened Device Tree) [ 1.942383] Workqueue: events power_supply_changed_work [ 1.947842] task: de2c41c0 ti: de2c8000 task.ti: de2c8000 [ 1.953483] PC is at tps65217_ac_get_property+0x14/0x28 [ 1.958937] LR is at tps65217_ac_get_property+0x10/0x28 Driver was trying to use drv_data in property get handler. However drv_data was not set, so it caused NULL pointer dereference. This patch properly sets drv_data during probe by power_supply_config parameter, so the property get handler works as desired. Signed-off-by: Marcin Niestroj Fixes: 3636859b280c ("power_supply: Add support for tps65217-charger") Signed-off-by: Sebastian Reichel --- drivers/power/tps65217_charger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c index d9f56730c735..73dfae41def8 100644 --- a/drivers/power/tps65217_charger.c +++ b/drivers/power/tps65217_charger.c @@ -197,6 +197,7 @@ static int tps65217_charger_probe(struct platform_device *pdev) { struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent); struct tps65217_charger *charger; + struct power_supply_config cfg = {}; int ret; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -208,9 +209,12 @@ static int tps65217_charger_probe(struct platform_device *pdev) charger->tps = tps; charger->dev = &pdev->dev; + cfg.of_node = pdev->dev.of_node; + cfg.drv_data = charger; + charger->ac = devm_power_supply_register(&pdev->dev, &tps65217_charger_desc, - NULL); + &cfg); if (IS_ERR(charger->ac)) { dev_err(&pdev->dev, "failed: power supply register\n"); return PTR_ERR(charger->ac); From c70410cb91de70707a507ee7beef7021a5a89f0d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 9 Jun 2016 14:38:50 -0400 Subject: [PATCH 0629/1050] rtl8xxxu: fix typo on variable name, compare against correct variable path_b_ok is being assigned but immediately after path_a_ok is being compared to the value 0x03. This appears to be a typo on the variable name, compare path_b_ok instead. Signed-off-by: Colin Ian King Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index fe19ace0d6a0..b04cf30f3959 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1149,7 +1149,7 @@ static void rtl8192eu_phy_iqcalibrate(struct rtl8xxxu_priv *priv, for (i = 0; i < retry; i++) { path_b_ok = rtl8192eu_rx_iqk_path_b(priv); - if (path_a_ok == 0x03) { + if (path_b_ok == 0x03) { val32 = rtl8xxxu_read32(priv, REG_RX_POWER_BEFORE_IQK_B_2); result[t][6] = (val32 >> 16) & 0x3ff; From 0d15ef677839dab8313fbb86c007c3175b638d03 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 16 Jun 2016 16:51:52 +0100 Subject: [PATCH 0630/1050] arm64: kgdb: Match pstate size with gdbserver protocol Current versions of gdb do not interoperate cleanly with kgdb on arm64 systems because gdb and kgdb do not use the same register description. This patch modifies kgdb to work with recent releases of gdb (>= 7.8.1). Compatibility with gdb (after the patch is applied) is as follows: gdb-7.6 and earlier Ok gdb-7.7 series Works if user provides custom target description gdb-7.8(.0) Works if user provides custom target description gdb-7.8.1 and later Ok When commit 44679a4f142b ("arm64: KGDB: Add step debugging support") was introduced it was paired with a gdb patch that made an incompatible change to the gdbserver protocol. This patch was eventually merged into the gdb sources: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=a4d9ba85ec5597a6a556afe26b712e878374b9dd The change to the protocol was mostly made to simplify big-endian support inside the kernel gdb stub. Unfortunately the gdb project released gdb-7.7.x and gdb-7.8.0 before the protocol incompatibility was identified and reversed: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=bdc144174bcb11e808b4e73089b850cf9620a7ee This leaves us in a position where kgdb still uses the no-longer-used protocol; gdb-7.8.1, which restored the original behaviour, was released on 2014-10-29. I don't believe it is possible to detect/correct the protocol incompatiblity which means the kernel must take a view about which version of the gdb remote protocol is "correct". This patch takes the view that the original/current version of the protocol is correct and that version found in gdb-7.7.x and gdb-7.8.0 is anomalous. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/include/asm/kgdb.h | 45 ++++++++++++++++++++++++++++------- arch/arm64/kernel/kgdb.c | 14 ++++++++++- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index f69f69c8120c..da84645525b9 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -38,25 +38,54 @@ extern int kgdb_fault_expected; #endif /* !__ASSEMBLY__ */ /* - * gdb is expecting the following registers layout. + * gdb remote procotol (well most versions of it) expects the following + * register layout. * * General purpose regs: * r0-r30: 64 bit * sp,pc : 64 bit - * pstate : 64 bit - * Total: 34 + * pstate : 32 bit + * Total: 33 + 1 * FPU regs: * f0-f31: 128 bit - * Total: 32 - * Extra regs * fpsr & fpcr: 32 bit - * Total: 2 + * Total: 32 + 2 * + * To expand a little on the "most versions of it"... when the gdb remote + * protocol for AArch64 was developed it depended on a statement in the + * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register". + * and, as a result, allocated only 32-bits for the PSTATE in the remote + * protocol. In fact this statement is still present in ARM DDI 0487A.i. + * + * Unfortunately "is a 32-bit register" has a very special meaning for + * system registers. It means that "the upper bits, bits[63:32], are + * RES0.". RES0 is heavily used in the ARM architecture documents as a + * way to leave space for future architecture changes. So to translate a + * little for people who don't spend their spare time reading ARM architecture + * manuals, what "is a 32-bit register" actually means in this context is + * "is a 64-bit register but one with no meaning allocated to any of the + * upper 32-bits... *yet*". + * + * Perhaps then we should not be surprised that this has led to some + * confusion. Specifically a patch, influenced by the above translation, + * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch + * was reverted in gdb-7.8.1 and all later releases, when this was + * discovered to be an undocumented protocol change. + * + * So... it is *not* wrong for us to only allocate 32-bits to PSTATE + * here even though the kernel itself allocates 64-bits for the same + * state. That is because this bit of code tells the kernel how the gdb + * remote protocol (well most versions of it) describes the register state. + * + * Note that if you are using one of the versions of gdb that supports + * the gdb-7.7 version of the protocol you cannot use kgdb directly + * without providing a custom register description (gdb can load new + * protocol descriptions at runtime). */ -#define _GP_REGS 34 +#define _GP_REGS 33 #define _FP_REGS 32 -#define _EXTRA_REGS 2 +#define _EXTRA_REGS 3 /* * general purpose registers size in bytes. * pstate is only 4 bytes. subtract 4 bytes diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b67531a13136..b5f063e5eff7 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -58,7 +58,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "x30", 8, offsetof(struct pt_regs, regs[30])}, { "sp", 8, offsetof(struct pt_regs, sp)}, { "pc", 8, offsetof(struct pt_regs, pc)}, - { "pstate", 8, offsetof(struct pt_regs, pstate)}, + /* + * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote + * protocol disagrees. Therefore we must extract only the lower + * 32-bits. Look for the big comment in asm/kgdb.h for more + * detail. + */ + { "pstate", 4, offsetof(struct pt_regs, pstate) +#ifdef CONFIG_CPU_BIG_ENDIAN + + 4 +#endif + }, { "v0", 16, -1 }, { "v1", 16, -1 }, { "v2", 16, -1 }, @@ -128,6 +138,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) memset((char *)gdb_regs, 0, NUMREGBYTES); thread_regs = task_pt_regs(task); memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); + /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ + dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) From 17471c7ba5115ed724d624577b21c166d2194272 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 15 Jun 2016 22:31:10 +0200 Subject: [PATCH 0631/1050] net: sfc: avoid -Wtype-limits warning When building with -Wextra, we get a harmless warning from the EFX_EXTRACT_OWORD32 macro: ethernet/sfc/farch.c: In function 'efx_farch_test_registers': ethernet/sfc/farch.c:119:30: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:144: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:392: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:731: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] The macro and the caller are both correct, but we can avoid the warning by changing the index variable to a signed type. Signed-off-by: Arnd Bergmann Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/farch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 133e9e35be9e..4c83739d158f 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx, const struct efx_farch_register_test *regs, size_t n_regs) { - unsigned address = 0, i, j; + unsigned address = 0; + int i, j; efx_oword_t mask, imask, original, reg, buf; for (i = 0; i < n_regs; ++i) { From a547224dceed4645139f7eff72ff51adb9938bcb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 15 Jun 2016 14:42:11 -0700 Subject: [PATCH 0632/1050] mlx4e: Do not attempt to offload VXLAN ports that are unrecognized The mlx4e driver does not support more than one port for VXLAN offload. As such expecting the hardware to offload other ports is invalid since it appears the parsing logic is used to perform Tx checksum and segmentation offloads. Use the vxlan_port number to determine in which cases we can apply the offload and in which cases we can not. Signed-off-by: Alexander Duyck Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 19ceced6736c..a2c25365a8a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2447,9 +2447,14 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, * strip that feature if this is an IPv6 encapsulated frame. */ if (skb->encapsulation && - (skb->ip_summed == CHECKSUM_PARTIAL) && - (ip_hdr(skb)->version != 4)) - features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + (skb->ip_summed == CHECKSUM_PARTIAL)) { + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->vxlan_port || + (ip_hdr(skb)->version != 4) || + (udp_hdr(skb)->dest != priv->vxlan_port)) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } return features; } From 8fa3b8d689a54d6d04ff7803c724fb7aca6ce98e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 May 2016 15:42:13 -0400 Subject: [PATCH 0633/1050] cgroup: set css->id to -1 during init If percpu_ref initialization fails during css_create(), the free path can end up trying to free css->id of zero. As ID 0 is unused, it doesn't cause a critical breakage but it does trigger a warning message. Fix it by setting css->id to -1 from init_and_link_css(). Signed-off-by: Tejun Heo Cc: Wenwei Tao Fixes: 01e586598b22 ("cgroup: release css->id after css_free") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Tejun Heo --- kernel/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 789b84f973c9..688eb0cd1851 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5063,6 +5063,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, memset(css, 0, sizeof(*css)); css->cgroup = cgrp; css->ss = ss; + css->id = -1; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); css->serial_nr = css_serial_nr_next++; From d5d8760b78d0cfafe292f965f599988138b06a70 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 16 Jun 2016 17:06:19 +0900 Subject: [PATCH 0634/1050] sit: correct IP protocol used in ipip6_err Since 32b8a8e59c9c ("sit: add IPv4 over IPv4 support") ipip6_err() may be called for packets whose IP protocol is IPPROTO_IPIP as well as those whose IP protocol is IPPROTO_IPV6. In the case of IPPROTO_IPIP packets the correct protocol value is not passed to ipv4_update_pmtu() or ipv4_redirect(). This patch resolves this problem by using the IP protocol of the packet rather than a hard-coded value. This appears to be consistent with the usage of the protocol of a packet by icmp_socket_deliver() the caller of ipip6_err(). I was able to exercise the redirect case by using a setup where an ICMP redirect was received for the destination of the encapsulated packet. However, it appears that although incorrect the protocol field is not used in this case and thus no problem manifests. On inspection it does not appear that a problem will manifest in the fragmentation needed/update pmtu case either. In short I believe this is a cosmetic fix. None the less, the use of IPPROTO_IPV6 seems wrong and confusing. Reviewed-by: Dinan Gunawardena Signed-off-by: Simon Horman Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0a5a255277e5..0619ac70836d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } From 38327424b40bcebe2de92d07312c89360ac9229a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Jun 2016 15:48:57 +0100 Subject: [PATCH 0635/1050] KEYS: potential uninitialized variable If __key_link_begin() failed then "edit" would be uninitialized. I've added a check to fix that. This allows a random user to crash the kernel, though it's quite difficult to achieve. There are three ways it can be done as the user would have to cause an error to occur in __key_link(): (1) Cause the kernel to run out of memory. In practice, this is difficult to achieve without ENOMEM cropping up elsewhere and aborting the attempt. (2) Revoke the destination keyring between the keyring ID being looked up and it being tested for revocation. In practice, this is difficult to time correctly because the KEYCTL_REJECT function can only be used from the request-key upcall process. Further, users can only make use of what's in /sbin/request-key.conf, though this does including a rejection debugging test - which means that the destination keyring has to be the caller's session keyring in practice. (3) Have just enough key quota available to create a key, a new session keyring for the upcall and a link in the session keyring, but not then sufficient quota to create a link in the nominated destination keyring so that it fails with EDQUOT. The bug can be triggered using option (3) above using something like the following: echo 80 >/proc/sys/kernel/keys/root_maxbytes keyctl request2 user debug:fred negate @t The above sets the quota to something much lower (80) to make the bug easier to trigger, but this is dependent on the system. Note also that the name of the keyring created contains a random number that may be between 1 and 10 characters in size, so may throw the test off by changing the amount of quota used. Assuming the failure occurs, something like the following will be seen: kfree_debugcheck: out of range ptr 6b6b6b6b6b6b6b68h ------------[ cut here ]------------ kernel BUG at ../mm/slab.c:2821! ... RIP: 0010:[] kfree_debugcheck+0x20/0x25 RSP: 0018:ffff8804014a7de8 EFLAGS: 00010092 RAX: 0000000000000034 RBX: 6b6b6b6b6b6b6b68 RCX: 0000000000000000 RDX: 0000000000040001 RSI: 00000000000000f6 RDI: 0000000000000300 RBP: ffff8804014a7df0 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8804014a7e68 R11: 0000000000000054 R12: 0000000000000202 R13: ffffffff81318a66 R14: 0000000000000000 R15: 0000000000000001 ... Call Trace: kfree+0xde/0x1bc assoc_array_cancel_edit+0x1f/0x36 __key_link_end+0x55/0x63 key_reject_and_link+0x124/0x155 keyctl_reject_key+0xb6/0xe0 keyctl_negate_key+0x10/0x12 SyS_keyctl+0x9f/0xe7 do_syscall_64+0x63/0x13a entry_SYSCALL64_slow_path+0x25/0x25 Fixes: f70e2e06196a ('KEYS: Do preallocation for __key_link()') Signed-off-by: Dan Carpenter Signed-off-by: David Howells cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- security/keys/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/key.c b/security/keys/key.c index bd5a272f28a6..346fbf201c22 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -597,7 +597,7 @@ int key_reject_and_link(struct key *key, mutex_unlock(&key_construction_mutex); - if (keyring) + if (keyring && link_ret == 0) __key_link_end(keyring, &key->index_key, edit); /* wake up anyone waiting for a key to be constructed */ From ce449ba77a2271dce9771fb1f3eb37e4be6a8b81 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 16 Jun 2016 14:42:50 +0100 Subject: [PATCH 0636/1050] nfp: use correct index to mask link state irq We were using an incorrect define to get the irq vector number. NFP_NET_CFG_LSC is a control BAR offset, LSC interrupt vector index is called NFP_NET_IRQ_LSC_IDX. For machines with less than 30 CPUs this meant that we were disabling/enabling IRQ 0. For bigger hosts we were just playing with the 31st RX/TX interrupt. Fixes: 0ba40af963f0 ("nfp: move link state interrupt request/free calls") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index fa47c14c743a..ba26bb356b8d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2015,7 +2015,7 @@ static void nfp_net_open_stack(struct nfp_net *nn) netif_tx_wake_all_queues(nn->netdev); - enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); nfp_net_read_link_status(nn); } @@ -2044,7 +2044,7 @@ static int nfp_net_netdev_open(struct net_device *netdev) NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); if (err) goto err_free_exn; - disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); @@ -2133,7 +2133,7 @@ static void nfp_net_close_stack(struct nfp_net *nn) { unsigned int r; - disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); netif_carrier_off(nn->netdev); nn->link_up = false; From a4c34ff1c029e90e7d5f8dd8d29b0a93b31c3cb2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 17 Jun 2016 11:29:48 +0200 Subject: [PATCH 0637/1050] iommu/vt-d: Enable QI on all IOMMUs before setting root entry This seems to be required on some X58 chipsets on systems with more than one IOMMU. QI does not work until it is enabled on all IOMMUs in the system. Reported-by: Dheeraj CVR Tested-by: Dheeraj CVR Fixes: 5f0a7f7614a9 ('iommu/vt-d: Make root entry visible for hardware right after allocation') Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a644d0cec2d8..10700945994e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3222,11 +3222,6 @@ static int __init init_dmars(void) } } - iommu_flush_write_buffer(iommu); - iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM @@ -3235,6 +3230,18 @@ static int __init init_dmars(void) #endif } + /* + * Now that qi is enabled on all iommus, set the root entry and flush + * caches. This is required on some Intel X58 chipsets, otherwise the + * flush_context function will loop forever and the boot hangs. + */ + for_each_active_iommu(iommu, drhd) { + iommu_flush_write_buffer(iommu); + iommu_set_root_entry(iommu); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + } + if (iommu_pass_through) iommu_identity_mapping |= IDENTMAP_ALL; From ef5bdccf6d4363fd934035e0b1ca8445975e1d89 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 16 Jun 2016 21:56:30 +0200 Subject: [PATCH 0638/1050] ARM: OMAP1: fix ams-delta FIQ handler to work with sparse IRQ After OMAP1 IRQ definitions have been changed by commit 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ") introduced in v4.2, ams-delta FIQ handler which depends on them no longer works as expected. Fix it. Created and tested on Amstrad Delta against Linux-4.7-rc3 Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/ams-delta-fiq-handler.S | 6 +++--- arch/arm/mach-omap1/ams-delta-fiq.c | 5 +++-- arch/arm/mach-omap1/include/mach/ams-delta-fiq.h | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S index 5d7fb596bf4a..bf608441b357 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S @@ -43,8 +43,8 @@ #define OTHERS_MASK (MODEM_IRQ_MASK | HOOK_SWITCH_MASK) /* IRQ handler register bitmasks */ -#define DEFERRED_FIQ_MASK (0x1 << (INT_DEFERRED_FIQ % IH2_BASE)) -#define GPIO_BANK1_MASK (0x1 << INT_GPIO_BANK1) +#define DEFERRED_FIQ_MASK OMAP_IRQ_BIT(INT_DEFERRED_FIQ) +#define GPIO_BANK1_MASK OMAP_IRQ_BIT(INT_GPIO_BANK1) /* Driver buffer byte offsets */ #define BUF_MASK (FIQ_MASK * 4) @@ -110,7 +110,7 @@ ENTRY(qwerty_fiqin_start) mov r8, #2 @ reset FIQ agreement str r8, [r12, #IRQ_CONTROL_REG_OFFSET] - cmp r10, #INT_GPIO_BANK1 @ is it GPIO bank interrupt? + cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY) @ is it GPIO interrupt? beq gpio @ yes - process it mov r8, #1 diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index d1f12095f315..ec760ae2f917 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -109,7 +109,8 @@ void __init ams_delta_init_fiq(void) * Since no set_type() method is provided by OMAP irq chip, * switch to edge triggered interrupt type manually. */ - offset = IRQ_ILR0_REG_OFFSET + INT_DEFERRED_FIQ * 0x4; + offset = IRQ_ILR0_REG_OFFSET + + ((INT_DEFERRED_FIQ - NR_IRQS_LEGACY) & 0x1f) * 0x4; val = omap_readl(DEFERRED_FIQ_IH_BASE + offset) & ~(1 << 1); omap_writel(val, DEFERRED_FIQ_IH_BASE + offset); @@ -149,7 +150,7 @@ void __init ams_delta_init_fiq(void) /* * Redirect GPIO interrupts to FIQ */ - offset = IRQ_ILR0_REG_OFFSET + INT_GPIO_BANK1 * 0x4; + offset = IRQ_ILR0_REG_OFFSET + (INT_GPIO_BANK1 - NR_IRQS_LEGACY) * 0x4; val = omap_readl(OMAP_IH1_BASE + offset) | 1; omap_writel(val, OMAP_IH1_BASE + offset); } diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h index adb5e7649659..6dfc3e1210a3 100644 --- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h +++ b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h @@ -14,6 +14,8 @@ #ifndef __AMS_DELTA_FIQ_H #define __AMS_DELTA_FIQ_H +#include + /* * Interrupt number used for passing control from FIQ to IRQ. * IRQ12, described as reserved, has been selected. From 970f9091d25df14e9540ec7ff48a2f709e284cd1 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 16 Jun 2016 15:25:18 +0300 Subject: [PATCH 0639/1050] ARM: OMAP2+: timer: add probe for clocksources A few platforms are currently missing clocksource_probe() completely in their time_init functionality. On OMAP3430 for example, this is causing cpuidle to be pretty much dead, as the counter32k is not going to be registered and instead a gptimer is used as a clocksource. This will tick in periodic mode, preventing any deeper idle states. While here, also drop one unnecessary check for populated DT before existing clocksource_probe() call. Signed-off-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 5b385bb8aff9..cb9497a20fb3 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -496,8 +496,7 @@ void __init omap_init_time(void) __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", 2, "timer_sys_ck", NULL, false); - if (of_have_populated_dt()) - clocksource_probe(); + clocksource_probe(); } #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX) @@ -505,6 +504,8 @@ void __init omap3_secure_sync32k_timer_init(void) { __omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure", 2, "timer_sys_ck", NULL, false); + + clocksource_probe(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -513,6 +514,8 @@ void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); + + clocksource_probe(); } #endif From e568006b9d828403397668864d9797dc4bfefd28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 17 Jun 2016 11:32:00 +0530 Subject: [PATCH 0640/1050] powerpc/mm/hash: Don't add memory coherence if cache inhibited is set H_ENTER hcall handling in qemu had assumptions that a cache inhibited hpte entry won't have memory conference set. Also older kernel mentioned that some version of pHyp required this (the code removed by the below commit says: /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~HPTE_R_M; But with older kernel we had some inconsistent memory conherence mapping. We always enabled memory conherence in the page fault path and removed memory conherence is _PAGE_NO_CACHE was set when we mapped the page via htab_bolt_mapping. The commit mentioned below tried to consolidate that by always enabling memory conherence. But as mentioned above that breaks Qemu H_ENTER handling. This patch update this such that we enable memory conherence only if cache inhibited is not set and bring fault handling, lpar and bolt mapping in sync. Fixes: commit 30bda41aba4e("powerpc/mm: Drop WIMG in favour of new constant") Reported-by: Darrick J. Wong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..5b22ba0b58bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * We can't allow hardware to update hpte bits. Hence always * set 'R' bit and set 'C' if it is a write fault - * Memory coherence is always enabled */ - rflags |= HPTE_R_R | HPTE_R_M; + rflags |= HPTE_R_R; if (pteflags & _PAGE_DIRTY) rflags |= HPTE_R_C; @@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } From b23d9c5b9c83c05e013aa52460f12a8365062cf4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 17 Jun 2016 11:40:36 +0530 Subject: [PATCH 0641/1050] powerpc/mm/radix: Update Radix tree size as per ISA 3.0 ISA 3.0 updated it to be encoded as Radix tree size = 2^(RTS + 31). We have it encoded as 2^(RTS + 28). Add a helper with the correct encoding and use it instead of opencoding. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reviewed-by: Balbir Singh Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/radix.h | 15 +++++++++++++++ arch/powerpc/mm/mmu_context_book3s64.c | 2 +- arch/powerpc/mm/pgtable-radix.c | 9 +++------ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 937d4e247ac3..df294224e280 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); + +static inline unsigned long radix__get_tree_size(void) +{ + unsigned long rts_field; + /* + * we support 52 bits, hence 52-31 = 21, 0b10101 + * RTS encoding details + * bits 0 - 3 of rts -> bits 6 - 8 unsigned long + * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long + */ + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + + return rts_field; +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 227b2a6c4544..196222227e82 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index) /* * set the process table entry, */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); return 0; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c939e6e57a9e..e58707deef5c 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -160,9 +160,8 @@ redo: process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. - * we support 52 bits, hence 52-28 = 24, 11000 */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); /* * Fill in the partition table. We are suppose to use effective address @@ -176,10 +175,8 @@ redo: static void __init radix_init_partition_table(void) { unsigned long rts_field; - /* - * we support 52 bits, hence 52-28 = 24, 11000 - */ - rts_field = 3ull << PPC_BITLSHIFT(2); + + rts_field = radix__get_tree_size(); BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); From a3aa256b7258b3d19f8b44557cc64525a993b941 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 17 Jun 2016 13:05:11 +1000 Subject: [PATCH 0642/1050] powerpc/eeh: Fix invalid cached PE primary bus The PE primary bus cannot be got from its child devices when having full hotplug in error recovery. The PE primary bus is cached, which is done in commit <05ba75f84864> ("powerpc/eeh: Fix stale cached primary bus"). In eeh_reset_device(), the flag (EEH_PE_PRI_BUS) is cleared before the PCI hot remove. eeh_pe_bus_get() then returns NULL as the PE primary bus in pnv_eeh_reset() and it crashes the kernel eventually. This fixes the issue by clearing the flag (EEH_PE_PRI_BUS) before the PCI hot add. With it, the PowerNV EEH reset backend (pnv_eeh_reset()) can get valid PE primary bus through eeh_pe_bus_get(). Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE") Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..b5f73cb5eeb6 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -642,7 +642,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); From d279f7a7e95af6bb4b5eaea3527d1f85a28c5cf6 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 15 Jun 2016 11:45:28 +0530 Subject: [PATCH 0643/1050] ARM: dts: am437x-sk-evm: Reduce i2c0 bus speed for tps65218 Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Nishanth Menon Signed-off-by: Franklin S Cooper Jr Signed-off-by: Aparna Balasubramanian Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index d82dd6e3f9b1..5687d6b4da60 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -418,7 +418,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps@24 { compatible = "ti,tps65218"; From 8a092e682f20f193f2070dba2ea1904e95814126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=20B=C3=A5tsman?= Date: Fri, 17 Jun 2016 13:31:37 +0300 Subject: [PATCH 0644/1050] regulator: anatop: allow regulator to be in bypass mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bypass support was added in commit d38018f2019c ("regulator: anatop: Add bypass support to digital LDOs"). A check for valid voltage selectors was added in commit da0607c8df5c ("regulator: anatop: Fail on invalid voltage selector") but it also discards all regulators that are in bypass mode. Add check for the bypass setting. Errors below were seen on a Variscite mx6 board. anatop_regulator 20c8000.anatop:regulator-vddcore@140: Failed to read a valid default voltage selector. anatop_regulator: probe of 20c8000.anatop:regulator-vddcore@140 failed with error -22 anatop_regulator 20c8000.anatop:regulator-vddsoc@140: Failed to read a valid default voltage selector. anatop_regulator: probe of 20c8000.anatop:regulator-vddsoc@140 failed with error -22 Fixes: da0607c8df5c ("regulator: anatop: Fail on invalid voltage selector") Signed-off-by: Mika Båtsman Signed-off-by: Mark Brown --- drivers/regulator/anatop-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 63cd5e68c864..3a6d0290c54c 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev) if (!sreg->sel && !strcmp(sreg->name, "vddpu")) sreg->sel = 22; - if (!sreg->sel) { + if (!sreg->bypass && !sreg->sel) { dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n"); return -EINVAL; } From 8f45927c3cae4db85887700e5415286f766cbaf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: [PATCH 0645/1050] netfilter: xt_SYNPROXY: add missing header to Kbuild Matt Whitlock says: Without this line, the file xt_SYNPROXY.h does not get installed in /usr/include/linux/netfilter/, and thus user-space programs cannot make use of it. Reported-by: Matt Whitlock Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 1d973d2ba417..cd26d7a0fd07 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -33,6 +33,7 @@ header-y += xt_NFLOG.h header-y += xt_NFQUEUE.h header-y += xt_RATEEST.h header-y += xt_SECMARK.h +header-y += xt_SYNPROXY.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h From 1463847e93fe693e89c52b03ab4ede6800d717c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: [PATCH 0646/1050] netfilter: xt_SYNPROXY: include missing ./usr/include/linux/netfilter/xt_SYNPROXY.h:11: found __[us]{8,16,32,64} type without #include Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_SYNPROXY.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h index 2d59fbaa93c6..ca67e61d2a61 100644 --- a/include/uapi/linux/netfilter/xt_SYNPROXY.h +++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h @@ -1,6 +1,8 @@ #ifndef _XT_SYNPROXY_H #define _XT_SYNPROXY_H +#include + #define XT_SYNPROXY_OPT_MSS 0x01 #define XT_SYNPROXY_OPT_WSCALE 0x02 #define XT_SYNPROXY_OPT_SACK_PERM 0x04 From 8198868f0a283eb23e264951632ce61ec2f82228 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 17 Jun 2016 13:35:56 +0200 Subject: [PATCH 0647/1050] ALSA: hdac_regmap - fix the register access for runtime PM Call path: 1) snd_hdac_power_up_pm() 2) snd_hdac_power_up() 3) pm_runtime_get_sync() 4) __pm_runtime_resume() 5) rpm_resume() The rpm_resume() returns 1 when the device is already active. Because the return value is unmodified, the hdac regmap read/write functions should allow this value for the retry I/O operation, too. Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- sound/hda/hdac_regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c index 87041ddd29cb..47a358fab132 100644 --- a/sound/hda/hdac_regmap.c +++ b/sound/hda/hdac_regmap.c @@ -444,7 +444,7 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg, err = reg_raw_write(codec, reg, val); if (err == -EAGAIN) { err = snd_hdac_power_up_pm(codec); - if (!err) + if (err >= 0) err = reg_raw_write(codec, reg, val); snd_hdac_power_down_pm(codec); } @@ -470,7 +470,7 @@ static int __snd_hdac_regmap_read_raw(struct hdac_device *codec, err = reg_raw_read(codec, reg, val, uncached); if (err == -EAGAIN) { err = snd_hdac_power_up_pm(codec); - if (!err) + if (err >= 0) err = reg_raw_read(codec, reg, val, uncached); snd_hdac_power_down_pm(codec); } From 6762925df4642aec5629f7971ba477d6930f53f7 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 31 May 2016 21:47:17 +0900 Subject: [PATCH 0648/1050] phy: rcar-gen3-usb2: fix unexpected repeat interrupts of VBUS change This patch fixes an issue that the driver is possible to cause unexpected repeat interrupts if a board condition is wrong (e.g. even if the ID pin is as function, a board supplies the VBUS.) The reason why unexpected repeat interrupts happen is: 1) The driver changed the mode to function if it detected the ID pin is high and the VBUS is high. 2) After the driver changed function mode, it disabled the "VBUS control" feature. Then, the VBUS signal will be low. 3) Since the VBUS change interruption happened, the driver checked the ID pin and VBUS. 4) Since VBUS was low, the driver changed the mode to host and enabled the "VBUS control" feature. Then the VBUS signal will be high. 5) Since the VBUS change interruption happened, the driver did 1) above. So, this patch modified the condition in rcar_gen3_device_recognition() to check the ID pin only. Fixes: 1114e2d (phy: rcar-gen3-usb2: change the mode to OTG on the combined channel) Cc: # v4.5+ Reported-by: Simon Horman Signed-off-by: Yoshihiro Shimoda Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-rcar-gen3-usb2.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c index 76bb88f0700a..4be3f5dbbc9f 100644 --- a/drivers/phy/phy-rcar-gen3-usb2.c +++ b/drivers/phy/phy-rcar-gen3-usb2.c @@ -144,12 +144,6 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch) extcon_set_cable_state_(ch->extcon, EXTCON_USB, true); } -static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch) -{ - return !!(readl(ch->base + USB2_ADPCTRL) & - USB2_ADPCTRL_OTGSESSVLD); -} - static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch) { return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG); @@ -157,13 +151,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch) static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch) { - bool is_host = true; - - /* B-device? */ - if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch)) - is_host = false; - - if (is_host) + if (!rcar_gen3_check_id(ch)) rcar_gen3_init_for_host(ch); else rcar_gen3_init_for_peri(ch); From 075adb8046532d9642f411a92b4f385d04ced24d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 Jun 2016 23:31:47 +0000 Subject: [PATCH 0649/1050] phy: rockchip-dp: fix return value check in rockchip_dp_phy_probe() In case of error, the function devm_kzalloc() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Reviewed-by: Heiko Stuebner Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-rockchip-dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c index 793ecb6d87bc..8b267a746576 100644 --- a/drivers/phy/phy-rockchip-dp.c +++ b/drivers/phy/phy-rockchip-dp.c @@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev) return -ENODEV; dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); - if (IS_ERR(dp)) + if (!dp) return -ENOMEM; dp->dev = dev; From 5cf700ac9d50353dc5b8194a57c6f40bf1fc4424 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 13 Jun 2016 13:45:48 +0200 Subject: [PATCH 0650/1050] phy: phy-sun4i-usb: Fix optional gpios failing probe The interrupt 0 is not a valid interrupt number. In the event where the retrieval of the vbus-det gpio would return null, the gpiod_to_irq callback would return 0, while the current code makes the assumption that it is a valid interrupt, and would go on calling request_irq. Obviously, this would fail, preventing the driver from probing properly, while the vbus and id gpios are optional. Signed-off-by: Quentin Schulz Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-sun4i-usb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index bae54f7a1f48..45f01d6c9ed2 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -645,11 +645,11 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) data->id_det_irq = gpiod_to_irq(data->id_det_gpio); data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio); - if ((data->id_det_gpio && data->id_det_irq < 0) || - (data->vbus_det_gpio && data->vbus_det_irq < 0)) + if ((data->id_det_gpio && data->id_det_irq <= 0) || + (data->vbus_det_gpio && data->vbus_det_irq <= 0)) data->phy0_poll = true; - if (data->id_det_irq >= 0) { + if (data->id_det_irq > 0) { ret = devm_request_irq(dev, data->id_det_irq, sun4i_usb_phy0_id_vbus_det_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, @@ -660,7 +660,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) } } - if (data->vbus_det_irq >= 0) { + if (data->vbus_det_irq > 0) { ret = devm_request_irq(dev, data->vbus_det_irq, sun4i_usb_phy0_id_vbus_det_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, From d99cb37828a2fe344ecc95d1fad570bbe447cc06 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 18:14:56 +0100 Subject: [PATCH 0651/1050] phy-sun4i-usb: fix missing __iomem * Fix the missing __iomem attribute in sun4i_usb_phy_write() function. This fixes the following sparse warnings: drivers/phy/phy-sun4i-usb.c:178:39: warning: incorrect type in initializer (different address spaces) drivers/phy/phy-sun4i-usb.c:178:39: expected void *phyctl drivers/phy/phy-sun4i-usb.c:178:39: got void [noderef] * drivers/phy/phy-sun4i-usb.c:185:17: warning: incorrect type in argument 2 (different address spaces) drivers/phy/phy-sun4i-usb.c:185:17: expected void volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:185:17: got void *phyctl drivers/phy/phy-sun4i-usb.c:189:24: warning: incorrect type in argument 1 (different address spaces) drivers/phy/phy-sun4i-usb.c:189:24: expected void const volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:189:24: got void *phyctl drivers/phy/phy-sun4i-usb.c:196:17: warning: incorrect type in argument 2 (different address spaces) drivers/phy/phy-sun4i-usb.c:196:17: expected void volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:196:17: got void *phyctl drivers/phy/phy-sun4i-usb.c:199:24: warning: incorrect type in argument 1 (different address spaces) drivers/phy/phy-sun4i-usb.c:199:24: expected void const volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:199:24: got void *phyctl drivers/phy/phy-sun4i-usb.c:205:17: warning: incorrect type in argument 2 (different address spaces) drivers/phy/phy-sun4i-usb.c:205:17: expected void volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:205:17: got void *phyctl drivers/phy/phy-sun4i-usb.c:208:24: warning: incorrect type in argument 1 (different address spaces) drivers/phy/phy-sun4i-usb.c:208:24: expected void const volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:208:24: got void *phyctl drivers/phy/phy-sun4i-usb.c:210:17: warning: incorrect type in argument 2 (different address spaces) drivers/phy/phy-sun4i-usb.c:210:17: expected void volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:210:17: got void *phyctl drivers/phy/phy-sun4i-usb.c:212:24: warning: incorrect type in argument 1 (different address spaces) drivers/phy/phy-sun4i-usb.c:212:24: expected void const volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:212:24: got void *phyctl drivers/phy/phy-sun4i-usb.c:214:17: warning: incorrect type in argument 2 (different address spaces) drivers/phy/phy-sun4i-usb.c:214:17: expected void volatile [noderef] *addr drivers/phy/phy-sun4i-usb.c:214:17: got void *phyctl Signed-off-by: Ben Dooks Acked-by: Hans de Goede Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-sun4i-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index 45f01d6c9ed2..739029430dda 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data, { struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy); u32 temp, usbc_bit = BIT(phy->index * 2); - void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset; + void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset; int i; mutex_lock(&phy_data->mutex); From 6c081ff6fd5abd621797570be43d5e3c6acfcd58 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 May 2016 11:01:33 +0300 Subject: [PATCH 0652/1050] phy: bcm-ns-usb2: checking the wrong variable We intended to test "usb2->phy" here instead of "dev". Fixes: d3feb4067335 ('phy: bcm-ns-usb2: new driver for USB 2.0 PHY on Northstar') Signed-off-by: Dan Carpenter Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-bcm-ns-usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c index 95ab6b2a0de5..58dff80e9386 100644 --- a/drivers/phy/phy-bcm-ns-usb2.c +++ b/drivers/phy/phy-bcm-ns-usb2.c @@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev) } usb2->phy = devm_phy_create(dev, NULL, &ops); - if (IS_ERR(dev)) - return PTR_ERR(dev); + if (IS_ERR(usb2->phy)) + return PTR_ERR(usb2->phy); phy_set_drvdata(usb2->phy, usb2); platform_set_drvdata(pdev, usb2); From 79bb71bd1d93197ce227fa167b450b633f30a52b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 15 Jun 2016 22:57:38 +0200 Subject: [PATCH 0653/1050] gpio: make sure gpiod_to_irq() returns negative on NULL desc commit 54d77198fdfbc4f0fe11b4252c1d9c97d51a3264 ("gpio: bail out silently on NULL descriptors") doesn't work for gpiod_to_irq(): drivers assume that NULL descriptors will give negative IRQ numbers in return. It has been pointed out that returning 0 is NO_IRQ and that drivers should be amended to treat this as an error, but that is for the longer term: now let us repair the semantics. Cc: Maxime Ripard Reported-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 58d822d7e8da..f39bf05993e7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2056,7 +2056,14 @@ int gpiod_to_irq(const struct gpio_desc *desc) struct gpio_chip *chip; int offset; - VALIDATE_DESC(desc); + /* + * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics + * requires this function to not return zero on an invalid descriptor + * but rather a negative error number. + */ + if (!desc || !desc->gdev || !desc->gdev->chip) + return -EINVAL; + chip = desc->gdev->chip; offset = gpio_chip_hwgpio(desc); if (chip->to_irq) { From bfbbe44daf64d0ccf2de123179817f3557fb9237 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 16 Jun 2016 11:55:55 +0200 Subject: [PATCH 0654/1050] gpio: make library immune to error pointers Most functions that take a GPIO descriptor in need to check the descriptor for IS_ERR(). We do this mostly in the VALIDATE_DESC() macro except for the gpiod_to_irq() function which needs special handling. Cc: stable@vger.kernel.org Reported-by: Grygorii Strashko Reviewed-by: Grygorii Strashko Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f39bf05993e7..570771ed19e6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1373,8 +1373,12 @@ done: #define VALIDATE_DESC(desc) do { \ if (!desc) \ return 0; \ + if (IS_ERR(desc)) { \ + pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \ + return PTR_ERR(desc); \ + } \ if (!desc->gdev) { \ - pr_warn("%s: invalid GPIO\n", __func__); \ + pr_warn("%s: invalid GPIO (no device)\n", __func__); \ return -EINVAL; \ } \ if ( !desc->gdev->chip ) { \ @@ -1386,8 +1390,12 @@ done: #define VALIDATE_DESC_VOID(desc) do { \ if (!desc) \ return; \ + if (IS_ERR(desc)) { \ + pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \ + return; \ + } \ if (!desc->gdev) { \ - pr_warn("%s: invalid GPIO\n", __func__); \ + pr_warn("%s: invalid GPIO (no device)\n", __func__); \ return; \ } \ if (!desc->gdev->chip) { \ @@ -2061,7 +2069,7 @@ int gpiod_to_irq(const struct gpio_desc *desc) * requires this function to not return zero on an invalid descriptor * but rather a negative error number. */ - if (!desc || !desc->gdev || !desc->gdev->chip) + if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip) return -EINVAL; chip = desc->gdev->chip; From 76bf3441ad6584ddc3aea1501927f21b21ba3a3a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 17:49:09 +0100 Subject: [PATCH 0655/1050] ata: sata_mv: fix mis-conversion in mv_write_cached_reg() Fix the signed issue in mv_write_cached_reg() where the laddr is assigned from a (long)addr instead of (unsigned long)addr. Fixes the following warnings: drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression Signed-off-by: Ben Dooks Signed-off-by: Tejun Heo --- drivers/ata/sata_mv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index bd74ee555278..745489a1c86a 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -986,7 +986,7 @@ static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new) * Looks like a lot of fuss, but it avoids an unnecessary * +1 usec read-after-write delay for unaffected registers. */ - laddr = (long)addr & 0xffff; + laddr = (unsigned long)addr & 0xffff; if (laddr >= 0x300 && laddr <= 0x33c) { laddr &= 0x000f; if (laddr == 0x4 || laddr == 0xc) { From 16ff4b454f1b56e8d89a9075feed0dd6ac510c3d Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 11 Jun 2016 18:11:10 +0200 Subject: [PATCH 0656/1050] btrfs: Use correct format specifier Component mirror_num of struct btrfsic_block is defined as unsigned int. Use %u as format specifier. Signed-off-by: Heinrich Schuchardt Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/check-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b677a6ea6001..7706c8dc5fa6 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2645,7 +2645,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); From c871b0f2fd27e7f9097d507f47de5270f88003b9 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 6 Jun 2016 12:01:23 -0700 Subject: [PATCH 0657/1050] Btrfs: check if extent buffer is aligned to sectorsize Thanks to fuzz testing, we can pass an invalid bytenr to extent buffer via alloc_extent_buffer(). An unaligned eb can have more pages than it should have, which ends up extent buffer's leak or some corrupted content in extent buffer. This adds a warning to let us quickly know what was happening. Now that alloc_extent_buffer() no more returns NULL, this changes its caller and callers of its caller to match with the new error handling. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 ++ fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 10 ++++++---- fs/btrfs/extent_io.c | 15 ++++++++++++--- fs/btrfs/tree-log.c | 4 ++-- fs/btrfs/volumes.c | 4 ++-- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 46025688f1d0..827c949fa4bc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2512,6 +2512,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); + } else { + ret = PTR_ERR(tmp); } return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1142127f6e5e..7b5d5e8efde6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1098,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); @@ -1114,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); @@ -1172,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 689d25ac6a68..5439e85c4813 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8016,8 +8016,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8659,8 +8660,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a3412d68ad37..aaee3ef55ed8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4892,18 +4892,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4948,8 +4955,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index aa8fed11f749..8ab1dc64cbba 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); if (*level == 1) { ret = wc->process_func(root, next, wc, ptr_gen); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fcbda4341f7d..c3a2900c6030 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6607,8 +6607,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) * overallocate but we can keep it as-is, only the first page is used. */ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); - if (!sb) - return -ENOMEM; + if (IS_ERR(sb)) + return PTR_ERR(sb); set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* From 64c12921e11b3a0c10d088606e328c58e29274d8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 8 Jun 2016 00:36:38 -0400 Subject: [PATCH 0658/1050] btrfs: account for non-CoW'd blocks in btrfs_abort_transaction The test for !trans->blocks_used in btrfs_abort_transaction is insufficient to determine whether it's safe to drop the transaction handle on the floor. btrfs_cow_block, informed by should_cow_block, can return blocks that have already been CoW'd in the current transaction. trans->blocks_used is only incremented for new block allocations. If an operation overlaps the blocks in the current transaction entirely and must abort the transaction, we'll happily let it clean up the trans handle even though it may have modified the blocks and will commit an incomplete operation. In the long-term, I'd like to do closer tracking of when the fs is actually modified so we can still recover as gracefully as possible, but that approach will need some discussion. In the short term, since this is the only code using trans->blocks_used, let's just switch it to a bool indicating whether any blocks were used and set it when should_cow_block returns false. Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++++- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.h | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 827c949fa4bc..6276add8538a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1554,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -2777,8 +2778,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5439e85c4813..29e5d000bbee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8045,7 +8045,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4339b6613f19..bf70d33b5791 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9fe0ec2bf0fe..c5abee4f01ad 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to From 3b6571c180da85e43550c608e954ab7b2a31d954 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 May 2016 13:03:04 -0400 Subject: [PATCH 0659/1050] Btrfs: don't BUG_ON() in btrfs_orphan_add This is just a screwup for developers, so change it to an ASSERT() so developers notice when things go wrong and deal with the error appropriately if ASSERT() isn't enabled. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Mark Fasheh Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5558d9e396e..bb62418b8023 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3271,7 +3271,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + ASSERT(!ret); + if (ret) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + if (insert) + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + return ret; + } } /* insert an orphan item to track this unlinked/truncated file */ From 90c711ab380d633bf85249bd5d819edb601feda7 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sun, 12 Jun 2016 23:39:58 -0400 Subject: [PATCH 0660/1050] btrfs: avoid blocking open_ctree from cleaner_kthread This fixes a problem introduced in commit 2f3165ecf103599f82bf0ea254039db335fb5005 "btrfs: don't force mounts to wait for cleaner_kthread to delete one or more subvolumes". open_ctree eventually calls btrfs_replay_log which in turn calls btrfs_commit_super which tries to lock the cleaner_mutex, causing a recursive mutex deadlock during mount. Instead of playing whack-a-mole trying to keep up with all the functions that may want to lock cleaner_mutex, put all the cleaner_mutex lockers back where they were, and attack the problem more directly: keep cleaner_kthread asleep until the filesystem is mounted. When filesystems are mounted read-only and later remounted read-write, open_ctree did not set fs_info->open and neither does anything else. Set this flag in btrfs_remount so that neither btrfs_delete_unused_bgs nor cleaner_kthread get confused by the common case of "/" filesystem read-only mount followed by read-write remount. Signed-off-by: Zygo Blaxell Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 25 ++++++++++--------------- fs/btrfs/super.c | 2 ++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7b5d5e8efde6..789f5f233940 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1806,6 +1806,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!root->fs_info->open) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -2520,7 +2527,6 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; - bool cleaner_mutex_locked = false; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2999,13 +3005,6 @@ retry_root_backup: goto fail_sysfs; } - /* - * Hold the cleaner_mutex thread here so that we don't block - * for a long time on btrfs_recover_relocation. cleaner_kthread - * will wait for us to finish mounting the filesystem. - */ - mutex_lock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = true; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) @@ -3065,8 +3064,10 @@ retry_root_backup: ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; - /* We locked cleaner_mutex before creating cleaner_kthread. */ + + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); @@ -3074,8 +3075,6 @@ retry_root_backup: goto fail_qgroup; } } - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3189,10 +3188,6 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - if (cleaner_mutex_locked) { - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; - } btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bf70d33b5791..60e7179ed4b7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } } sb->s_flags &= ~MS_RDONLY; + + fs_info->open = 1; } out: wake_up_process(fs_info->transaction_kthread); From f7af3934c2bccba261972261ac8ebcbf92a346b2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 17 Jun 2016 18:15:25 +0200 Subject: [PATCH 0661/1050] btrfs: use new error message helper in qgroup_account_snapshot We've renamed btrfs_std_error, this one is left from last merge. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f6e24cb423ae..4e74b5733030 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1385,7 +1385,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, switch_commit_roots(trans->transaction, fs_info); ret = btrfs_write_and_wait_transaction(trans, src); if (ret) - btrfs_std_error(fs_info, ret, + btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); out: From 89c5a5441d703ba068699524ae59f7806e9b173d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 16 Jun 2016 17:34:28 +0200 Subject: [PATCH 0662/1050] btrfs: remove build fixup for qgroup_account_snapshot Introduced in 2c1984f244838477aab ("btrfs: build fixup for qgroup_account_snapshot") as temporary bisectability build fixup. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e74b5733030..765845742fde 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1311,11 +1311,6 @@ int btrfs_defrag_root(struct btrfs_root *root) return ret; } -/* Bisesctability fixup, remove in 4.8 */ -#ifndef btrfs_std_error -#define btrfs_std_error btrfs_handle_fs_error -#endif - /* * Do all special snapshot related qgroup dirty hack. * From dd5c93111dc9d26e038ac437f7a403d617e82c62 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 16 Jun 2016 22:07:58 +0530 Subject: [PATCH 0663/1050] Btrfs: btrfs_check_super_valid: Allow 4096 as stripesize Older btrfs-progs/mkfs.btrfs sets 4096 as the stripesize. Hence restricting stripesize to be equal to sectorsize would cause super block validation to return an error on architectures where PAGE_SIZE is not equal to 4096. Hence as a workaround, this commit allows stripesize to be set to 4096 bytes. Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 789f5f233940..54cca7a1572b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4134,7 +4134,8 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, ret = -EINVAL; } if (!is_power_of_2(btrfs_super_stripesize(sb)) || - btrfs_super_stripesize(sb) != sectorsize) { + ((btrfs_super_stripesize(sb) != sectorsize) && + (btrfs_super_stripesize(sb) != 4096))) { btrfs_err(fs_info, "invalid stripesize %u", btrfs_super_stripesize(sb)); ret = -EINVAL; From 041bf0225552044b85ce7ee7981e790d987d6ceb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Jun 2016 11:30:23 +0300 Subject: [PATCH 0664/1050] drm/amdgpu: missing bounds check in amdgpu_set_pp_force_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no limit on high "idx" can go. It should be less than ARRAY_SIZE(data.states) which is 16. The "data" variable wasn't declared in that scope so I shifted the code around a bit to make it work. Also I made "idx" unsigned. Fixes: f3898ea12fc1 ('drm/amd/powerplay: add some sysfs interfaces for powerplay.') Acked-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 28 ++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 589b36e8c5cf..0e13d80d2a95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state = 0; - long idx; + unsigned long idx; int ret; if (strlen(buf) == 1) adev->pp_force_state_enabled = false; - else { - ret = kstrtol(buf, 0, &idx); + else if (adev->pp_enabled) { + struct pp_states_info data; - if (ret) { + ret = kstrtoul(buf, 0, &idx); + if (ret || idx >= ARRAY_SIZE(data.states)) { count = -EINVAL; goto fail; } - if (adev->pp_enabled) { - struct pp_states_info data; - amdgpu_dpm_get_pp_num_states(adev, &data); - state = data.states[idx]; - /* only set user selected power states */ - if (state != POWER_STATE_TYPE_INTERNAL_BOOT && - state != POWER_STATE_TYPE_DEFAULT) { - amdgpu_dpm_dispatch_task(adev, - AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); - adev->pp_force_state_enabled = true; - } + amdgpu_dpm_get_pp_num_states(adev, &data); + state = data.states[idx]; + /* only set user selected power states */ + if (state != POWER_STATE_TYPE_INTERNAL_BOOT && + state != POWER_STATE_TYPE_DEFAULT) { + amdgpu_dpm_dispatch_task(adev, + AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); + adev->pp_force_state_enabled = true; } } fail: From 6982f867e058f0c6c8d3eeb4ef305efd7d7f4c8c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Jun 2016 21:01:18 +0200 Subject: [PATCH 0665/1050] Revert "Staging: drivers: rtl8188eu: use sizeof(*ptr) instead of sizeof(struct)" This reverts commit 99aded71b52c ("Staging: drivers: rtl8188eu: use sizeof(*ptr) instead of sizeof(struct)"). This commit is wrong, as adapt->HalData has a type of "void *", so now we are allocating a much to small struct, which causes the driver to overwrite random memory which leads to a non working driver and various system crashes. Cc: Jacky Boen Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/hal/usb_halinit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/hal/usb_halinit.c b/drivers/staging/rtl8188eu/hal/usb_halinit.c index 87ea3b844951..363f3a34ddce 100644 --- a/drivers/staging/rtl8188eu/hal/usb_halinit.c +++ b/drivers/staging/rtl8188eu/hal/usb_halinit.c @@ -2072,7 +2072,8 @@ void rtl8188eu_set_hal_ops(struct adapter *adapt) { struct hal_ops *halfunc = &adapt->HalFunc; - adapt->HalData = kzalloc(sizeof(*adapt->HalData), GFP_KERNEL); + + adapt->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL); if (!adapt->HalData) DBG_88E("cant not alloc memory for HAL DATA\n"); From bc8201e3e1885614a5cd3a5f70a79d08f99152fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Jun 2016 21:01:19 +0200 Subject: [PATCH 0666/1050] Revert "Staging: rtl8188eu: rtw_efuse: Use sizeof type *pointer instead of sizeof type." This reverts commit b5e12ec38331 ("Staging: rtl8188eu: rtw_efuse: Use sizeof type *pointer instead of sizeof type."). This commit is wrong, the rtw_malloc2d helper function takes the size of the array elements as its 3th argument, whereas sizeof(*eFuseWord) gives the size of a pointer instead of the size of a u16. Since sizeof(void *) > sizeof(u16) this has no adverse effects, but it is still wrong. Cc: Sandhya Bankar Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_efuse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_efuse.c b/drivers/staging/rtl8188eu/core/rtw_efuse.c index c17870cddb5b..fbce1f7e68ca 100644 --- a/drivers/staging/rtl8188eu/core/rtw_efuse.c +++ b/drivers/staging/rtl8188eu/core/rtw_efuse.c @@ -102,7 +102,7 @@ efuse_phymap_to_logical(u8 *phymap, u16 _offset, u16 _size_byte, u8 *pbuf) if (!efuseTbl) return; - eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(*eFuseWord)); + eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(u16)); if (!eFuseWord) { DBG_88E("%s: alloc eFuseWord fail!\n", __func__); goto eFuseWord_failed; From 0b10029d826ed8c18a5f9d2f4abfa415d15cd1d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 Jun 2016 10:17:17 -0400 Subject: [PATCH 0667/1050] drm/amdgpu: fix num_rbs exposed to userspace (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was accidently broken for harvest cards when the code was refactored for Polaris support. v2: multiply by shader engines. Noticed by Nicolai. Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 40a23704a981..d851ea15059f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -447,7 +447,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.max_memory_clock = adev->pm.default_mclk * 10; } dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; - dev_info.num_rb_pipes = adev->gfx.config.num_rbs; + dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines; dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; dev_info._pad = 0; dev_info.ids_flags = 0; From 8c5122e45a10a9262f872b53f151a592e870f905 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jun 2016 17:28:29 -0600 Subject: [PATCH 0668/1050] IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs When this code was reworked for IBoE support the order of assignments for the sl_tclass_flowlabel got flipped around resulting in TClass & FlowLabel being permanently set to 0 in the packet headers. This breaks IB routers that rely on these headers, but only affects kernel users - libmlx4 does this properly for user space. Cc: stable@vger.kernel.org Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 105246fba2e7..5fc623362731 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { ah->av.ib.g_slid |= 0x80; ah->av.ib.gid_index = ah_attr->grh.sgid_index; @@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } - ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } From 37e07cdafc111dfb7ce27e70e73d900d7cf2920c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 10 Jun 2016 11:08:25 -0700 Subject: [PATCH 0669/1050] IB/cma: Make the code easier to verify Static source code analysis tools like smatch cannot handle functions that lock or not lock a mutex depending on the value of the arguments. Hence inline the function cma_disable_callback(). Additionally, this patch realizes a small performance optimization by reducing the number of mutex_lock() and mutex_unlock() calls in the modified functions. With this patch applied smatch no longer complains about source file cma.c. Without this patch smatch reports the following for this source file: drivers/infiniband/core/cma.c:1959: cma_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'. Locked on: line 1880 line 1959 Unlocked on: line 1941 drivers/infiniband/core/cma.c:2112: iw_conn_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'. Locked on: line 2048 Unlocked on: line 2112 Signed-off-by: Bart Van Assche Cc: Sean Hefty Cc: Steve Wise Cc: Leon Romanovsky Acked-by: Sean Hefty Reviewed-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 54 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0c91ba3178a..c58ee771baaa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -static int cma_disable_callback(struct rdma_id_private *id_priv, - enum rdma_cm_state state) -{ - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != state) { - mutex_unlock(&id_priv->handler_mutex); - return -EINVAL; - } - return 0; -} - struct rdma_cm_id *rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, @@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; + mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, RDMA_CM_CONNECT)) || + id_priv->state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, RDMA_CM_DISCONNECT))) - return 0; + id_priv->state != RDMA_CM_DISCONNECT)) + goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { - struct rdma_id_private *listen_id, *conn_id; + struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; int offset, ret; @@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto net_dev_put; } - if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) { + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; - goto net_dev_put; + goto err1; } memset(&event, 0, sizeof event); @@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; - if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; memset(&event, 0, sizeof event); switch (iw_event->event) { @@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) return ret; } +out: mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; - int ret; + int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) - return -ECONNABORTED; + + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) + goto out; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, @@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; - int ret; + int ret = 0; id_priv = mc->id_priv; - if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && - cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_ADDR_BOUND && + id_priv->state != RDMA_CM_ADDR_RESOLVED) + goto out; if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); @@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) return 0; } +out: mutex_unlock(&id_priv->handler_mutex); return 0; } From b4ba6633ea153266429f16614029ab1578815390 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 9 Jun 2016 07:51:08 -0700 Subject: [PATCH 0670/1050] IB/hfi1: Fix credit return threshold adjustment The credit return threshold adjustment on mtu change algorithm does not take into account all the kernel send contexts that are assigned per VL. Use the pio send context map to adjust the credit return thresholds for all the allocated and assigned kernel send contexts based on the MTU adjustment per VL. The pio send context map can be changed dynamically based on the actual number of operational vls which is set by the fabric manager. When this happens update the credit return threshold values for all the remapped kernel send contexts. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Reviewed-by: Jianxin Xiong Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 +++++-- drivers/infiniband/hw/hfi1/pio.c | 24 ++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 81619fbb5842..c533d4422399 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9777,7 +9777,7 @@ static void set_send_length(struct hfi1_pportdata *ppd) u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2) & SEND_LEN_CHECK1_LEN_VL15_MASK) << SEND_LEN_CHECK1_LEN_VL15_SHIFT; - int i; + int i, j; u32 thres; for (i = 0; i < ppd->vls_supported; i++) { @@ -9801,7 +9801,10 @@ static void set_send_length(struct hfi1_pportdata *ppd) sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, dd->rcd[0]->rcvhdrqentsize)); - sc_set_cr_threshold(dd->vld[i].sc, thres); + for (j = 0; j < INIT_SC_PER_VL; j++) + sc_set_cr_threshold( + pio_select_send_context_vl(dd, j, i), + thres); } thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50), sc_mtu_to_threshold(dd->vld[15].sc, diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d5edb1afbb8f..e0e1ff2a0c72 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1797,6 +1797,21 @@ static void pio_map_rcu_callback(struct rcu_head *list) pio_map_free(m); } +/* + * Set credit return threshold for the kernel send context + */ +static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) +{ + u32 thres; + + thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], + 50), + sc_mtu_to_threshold(dd->kernel_send_context[scontext], + dd->vld[i].mtu, + dd->rcd[0]->rcvhdrqentsize)); + sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); +} + /* * pio_map_init - called when #vls change * @dd: hfi1_devdata @@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) if (!newmap->map[i]) goto bail; newmap->map[i]->mask = (1 << ilog2(sz)) - 1; - /* assign send contexts */ + /* + * assign send contexts and + * adjust credit return threshold + */ for (j = 0; j < sz; j++) { - if (dd->kernel_send_context[scontext]) + if (dd->kernel_send_context[scontext]) { newmap->map[i]->ksc[j] = dd->kernel_send_context[scontext]; + set_threshold(dd, scontext, i); + } if (++scontext >= first_scontext + vl_scontexts[i]) /* wrap back to first send context */ From 96605672a4172f6e31f31ce29ee27fef68011de0 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 9 Jun 2016 07:51:14 -0700 Subject: [PATCH 0671/1050] IB/rdmavt: Correct required callback functions for MODIFY_QP Functions required for MODIFY_PORT were incorrectly being required for MODIFY_QP. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e1cc2cc42f25..30c4fda7a05a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) !rdi->driver_f.quiesce_qp || !rdi->driver_f.notify_error_qp || !rdi->driver_f.mtu_from_qp || - !rdi->driver_f.mtu_to_path_mtu || - !rdi->driver_f.shut_down_port || - !rdi->driver_f.cap_mask_chg) + !rdi->driver_f.mtu_to_path_mtu) return -EINVAL; break; From 501edc42446e89fa67fb6ef2c9afb50792c310c0 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 9 Jun 2016 07:51:20 -0700 Subject: [PATCH 0672/1050] IB/rdmavt: Correct warning during QPN allocation Correct calculation of the low order bits which should be unset based on use of qos_shift parameter when assigning QPN. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7de5134bec85..c3e0d614f68b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, /* wrap to first map page, invert bit 0 */ offset = qpt->incr | ((offset & 1) ^ 1); } - /* there can be no bits at shift and below */ - WARN_ON(offset & (rdi->dparms.qos_shift - 1)); + /* there can be no set bits in low-order QoS bits */ + WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1)); qpn = mk_qpn(qpt, map, offset); } From c3c64a951cbdb6096d02dcc339a9c807ce1e976a Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 9 Jun 2016 07:51:27 -0700 Subject: [PATCH 0673/1050] IB/hfi1: Increase packet egress timeout The current value of 500us for the packet egress timeout is too small which causes the host to declare failure on draining packets too early and unnecessarily bounces the link. Increase this to 50ms taking into account the switch packet discard timer default and the worst case per-VL package drainage rate. Reviewed-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index e0e1ff2a0c72..d4022450b73f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) /* counter is reset if occupancy count changes */ if (reg != reg_prev) loop = 0; - if (loop > 500) { + if (loop > 50000) { /* timed out - bounce the link */ dd_dev_err(dd, "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", From ca2f30a0a6786e6b08eeb8abb2bbb8df58709d6e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 9 Jun 2016 07:51:33 -0700 Subject: [PATCH 0674/1050] IB/hfi1: Prevent context loss If a context has already been assigned to an FD, prevent another assignment. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7a5b0e676cc7..c702a009608f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, switch (cmd) { case HFI1_IOCTL_ASSIGN_CTXT: + if (uctxt) + return -EINVAL; + if (copy_from_user(&uinfo, (struct hfi1_user_info __user *)arg, sizeof(uinfo))) From 5e9ef24619486213223053678eb9175630ef6bf9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 9 Jun 2016 07:51:39 -0700 Subject: [PATCH 0675/1050] IB/qib: Prevent context loss If a context has already been assigned to an FD, prevent another assignment. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_file_ops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index ff946d5f59e4..382466a90da7 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data, switch (cmd.type) { case QIB_CMD_ASSIGN_CTXT: + if (rcd) { + ret = -EINVAL; + goto bail; + } + ret = qib_assign_ctxt(fp, &cmd.cmd.user_info); if (ret) goto bail; From 93dd0a097859a174817ea94ec55bfc29c5706454 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 9 Jun 2016 07:51:45 -0700 Subject: [PATCH 0676/1050] IB/hfi1: Fix potential NULL ptr dereference This fixes potential NULL ptr dereference because IS_ERR(dd) doesn't handle NULL. Fix the issue by initializing the pointer with a not NULL error code. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0d28a5a40fae..eed971ccd2a1 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; - struct hfi1_devdata *dd = NULL; + struct hfi1_devdata *dd = ERR_PTR(-EINVAL); struct hfi1_pportdata *ppd; /* First, lock the non-writable module parameters */ From c078f0dd01b73c70b92a660cb1ce3dfc3cbf2903 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 9 Jun 2016 07:51:51 -0700 Subject: [PATCH 0677/1050] IB/hfi1: Fix potential buffer overflow This fixes potential buffer overflow because the sprintf function doesn't check buffer boundaries. Use snprintf instead. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qsfp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 2441669f0817..9fb561682c66 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) if (ppd->qsfp_info.cache_valid) { if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) - sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); + snprintf(lenstr, sizeof(lenstr), "%dM ", + cache[QSFP_MOD_LEN_OFFS]); power_byte = cache[QSFP_MOD_PWR_OFFS]; sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", From 3ec5fa28c9ea54fb172859d2612a0be6526e4dc1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 9 Jun 2016 07:51:57 -0700 Subject: [PATCH 0678/1050] IB/hfi1: Remove FULL_MGMT_P_KEY from pkey table at link up FULL_MGMT_P_KEY doesn't get cleared from the pkey table at link bounce because the link down and link bounce code paths are different when moving a QSFP cable on a switch. This causes an HFI unit connected to a switch to try to be initialized to an FM node when the QSFP cable is moved from a MgmtAllowed=NO port to a MgmtAllowed=YES port and back to a MgmtAllowed=NO port. Remove FULL_MGMT_P_KEY from pkey table at link up. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c533d4422399..3487fdfb2c63 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); -static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); +static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); - if (ppd->mgmt_allowed) - remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7072,10 +7070,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } -static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) { - ppd->pkeys[2] = 0; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + if (ppd->pkeys[2] != 0) { + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + } } /* @@ -9168,6 +9168,13 @@ int start_link(struct hfi1_pportdata *ppd) return 0; } + /* + * FULL_MGMT_P_KEY is cleared from the pkey table, so that the + * pkey table can be configured properly if the HFI unit is connected + * to switch port with MgmtAllowed=NO + */ + clear_full_mgmt_pkey(ppd); + return set_link_state(ppd, HLS_DN_POLL); } From 34d351f8ddf6dee24d739c4b00a4404e48089a04 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 9 Jun 2016 07:52:03 -0700 Subject: [PATCH 0679/1050] IB/hfi1: Send a pkey change event on driver pkey update Swapping a cable from a "Mgmt Allowed=No" switch port to a "Mgmt Allowed=Yes" switch port doesn't send a pkey change notification. Therefore, the link doesn't become active as the oib_utils layer uses an old pkey table cache. Fix by ensuring the pkey change notification is sent when the table is changed both explicitly by the FM and implicitly by the driver via a cable swap. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 ++ drivers/infiniband/hw/hfi1/mad.c | 19 ++++++++++++------- drivers/infiniband/hw/hfi1/mad.h | 2 ++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3487fdfb2c63..f5de85178055 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7068,6 +7068,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); ppd->pkeys[2] = FULL_MGMT_P_KEY; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); } static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) @@ -7075,6 +7076,7 @@ static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) if (ppd->pkeys[2] != 0) { ppd->pkeys[2] = 0; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); } } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 219029576ba0..fca07a1d6c28 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp) memset(data, 0, size); } +void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) +{ + struct ib_event event; + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev.rdi.ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); +} + static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) } if (changed) { - struct ib_event event; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); - - event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev.rdi.ibdev; - event.element.port_num = port; - ib_dispatch_event(&event); + hfi1_event_pkey_change(dd, port); } + return 0; } diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 55ee08675333..8b734aaae88a 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -434,4 +434,6 @@ struct sc2vlnt { COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) +void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); + #endif /* _HFI1_MAD_H */ From 083d5ad1a924e79ecf92be37cce9f1efa5c1d240 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Jun 2016 13:41:07 -0700 Subject: [PATCH 0680/1050] usbip: rate limit get_frame_number message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's annoying to constantly see the same "Not yet implemented" message over and over with nothing able to be done about it, so rate limit it for now to keep user's logs "clean". Reported-by: Lars Täuber Tested-by: Lars Täuber Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index fca51105974e..2e0450bec1b1 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -941,7 +941,7 @@ static void vhci_stop(struct usb_hcd *hcd) static int vhci_get_frame_number(struct usb_hcd *hcd) { - pr_err("Not yet implemented\n"); + dev_err_ratelimited(&hcd->self.root_hub->dev, "Not yet implemented\n"); return 0; } From 3a4955111ad46a022f05b51f91306d864f989625 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:08:27 -0400 Subject: [PATCH 0681/1050] isa: Allow ISA-style drivers on modern systems Several modern devices, such as PC/104 cards, are expected to run on modern systems via an ISA bus interface. Since ISA is a legacy interface for most modern architectures, ISA support should remain disabled in general. Support for ISA-style drivers should be enabled on a per driver basis. To allow ISA-style drivers on modern systems, this patch introduces the ISA_BUS_API and ISA_BUS Kconfig options. The ISA bus driver will now build conditionally on the ISA_BUS_API Kconfig option, which defaults to the legacy ISA Kconfig option. The ISA_BUS Kconfig option allows the ISA_BUS_API Kconfig option to be selected on architectures which do not enable ISA (e.g. X86_64). The ISA_BUS Kconfig option is currently only implemented for X86 architectures. Other architectures may have their own ISA_BUS Kconfig options added as required. Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 9 +++++++++ drivers/base/Makefile | 2 +- include/linux/isa.h | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index d794384a0404..e9734796531f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -606,6 +606,9 @@ config HAVE_ARCH_HASH file which provides platform-specific implementations of some functions in or fs/namei.c. +config ISA_BUS_API + def_bool ISA + # # ABI hall of shame # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..d9a94da0c29f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,15 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" +config ISA_BUS + bool "ISA-style bus support on modern systems" if EXPERT + select ISA_BUS_API + help + Enables ISA-style drivers on modern systems. This is necessary to + support PC/104 devices on X86_64 platforms. + + If unsure, say N. + # x86_64 have no ISA slots, but can have ISA-style DMA. config ISA_DMA_API bool "ISA-style DMA support" if (X86_64 && EXPERT) diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 6b2a84e7f2be..2609ba20b396 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_ISA) += isa.o +obj-$(CONFIG_ISA_BUS_API) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o diff --git a/include/linux/isa.h b/include/linux/isa.h index 5ab85281230b..384ab9b7d79a 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS_API int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else From f4ae916912b4969916ebb275995c745b01cb432c Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:08:56 -0400 Subject: [PATCH 0682/1050] gpio: Allow PC/104 devices on X86_64 With the introduction of the ISA_BUS_API Kconfig option, ISA-style drivers may be built for X86_64 architectures. This patch changes the ISA Kconfig option dependency of the PC/104 drivers to ISA_BUS_API, thus allowing them to build for X86_64 as they are expected to. Cc: Alexandre Courbot Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index a116609b1914..cebcb405812e 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -531,7 +531,7 @@ menu "Port-mapped I/O GPIO drivers" config GPIO_104_DIO_48E tristate "ACCES 104-DIO-48E GPIO support" - depends on ISA + depends on ISA_BUS_API select GPIOLIB_IRQCHIP help Enables GPIO support for the ACCES 104-DIO-48E series (104-DIO-48E, @@ -541,7 +541,7 @@ config GPIO_104_DIO_48E config GPIO_104_IDIO_16 tristate "ACCES 104-IDIO-16 GPIO support" - depends on ISA + depends on ISA_BUS_API select GPIOLIB_IRQCHIP help Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16, @@ -552,7 +552,7 @@ config GPIO_104_IDIO_16 config GPIO_104_IDI_48 tristate "ACCES 104-IDI-48 GPIO support" - depends on ISA + depends on ISA_BUS_API select GPIOLIB_IRQCHIP help Enables GPIO support for the ACCES 104-IDI-48 family (104-IDI-48A, @@ -628,7 +628,7 @@ config GPIO_TS5500 config GPIO_WS16C48 tristate "WinSystems WS16C48 GPIO support" - depends on ISA + depends on ISA_BUS_API select GPIOLIB_IRQCHIP help Enables GPIO support for the WinSystems WS16C48. The base port From 75897b7c5ee9f9a23ec7863d4f8758bc041ba656 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:09:08 -0400 Subject: [PATCH 0683/1050] iio: stx104: Allow build for X86_64 With the introduction of the ISA_BUS_API Kconfig option, ISA-style drivers may be built for X86_64 architectures. This patch changes the ISA Kconfig option dependency of the Apex Embedded Systems STX104 DAC driver to ISA_BUS_API, thus allowing it to build for X86_64 as it is expected to. Cc: Hartmut Knaack Cc: Lars-Peter Clausen Cc: Peter Meerwald-Stadler Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig index e63b957c985f..f7c71da42f15 100644 --- a/drivers/iio/dac/Kconfig +++ b/drivers/iio/dac/Kconfig @@ -247,7 +247,7 @@ config MCP4922 config STX104 tristate "Apex Embedded Systems STX104 DAC driver" - depends on X86 && ISA + depends on X86 && ISA_BUS_API help Say yes here to build support for the 2-channel DAC on the Apex Embedded Systems STX104 integrated analog PC/104 card. The base port From b87b8ff760d51b33b4e23d7f3a42ded55a668735 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:09:27 -0400 Subject: [PATCH 0684/1050] watchdog: ebc-c384_wdt: Allow build for X86_64 With the introduction of the ISA_BUS_API Kconfig option, ISA-style drivers may be built for X86_64 architectures. This patch changes the ISA Kconfig option dependency of the WinSystems EBC-C384 watchdog timer driver to ISA_BUS_API, thus allowing it to build for X86_64 as it is expected to. Cc: Wim Van Sebroeck Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b54f26c55dfd..b4b3e256491b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -746,7 +746,7 @@ config ALIM7101_WDT config EBC_C384_WDT tristate "WinSystems EBC-C384 Watchdog Timer" - depends on X86 && ISA + depends on X86 && ISA_BUS_API select WATCHDOG_CORE help Enables watchdog timer support for the watchdog timer on the From 32a5a0c047343b11f581f663a2309cf43d13466f Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 11 May 2016 17:01:40 -0400 Subject: [PATCH 0685/1050] isa: Call isa_bus_init before dependent ISA bus drivers register The isa_bus_init function must be called before drivers which utilize the ISA bus driver are registered. A race condition for initilization exists if device_initcall is used (the isa_bus_init callback is placed in the same initcall level as dependent drivers which use module_init). This patch ensures that isa_bus_init is called first by utilizing postcore_initcall in favor of device_initcall. Fixes: a5117ba7da37 ("[PATCH] Driver model: add ISA bus") Cc: Rene Herman Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- drivers/base/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/isa.c b/drivers/base/isa.c index 91dba65d7264..cd6ccdcf9df0 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -180,4 +180,4 @@ static int __init isa_bus_init(void) return error; } -device_initcall(isa_bus_init); +postcore_initcall(isa_bus_init); From 5e25db870ec983be138b343a3d04c79a5c1f1703 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 9 May 2016 09:39:50 -0400 Subject: [PATCH 0686/1050] isa: Dummy isa_register_driver should return error code The inline isa_register_driver stub simply allows compilation on systems with CONFIG_ISA disabled; the dummy isa_register_driver does not register an isa_driver at all. The inline isa_register_driver should return -ENODEV to indicate lack of support when attempting to register an isa_driver on such a system with CONFIG_ISA disabled. Cc: Matthew Wilcox Reported-by: Sasha Levin Tested-by: Ye Xiaolong Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/isa.h b/include/linux/isa.h index 384ab9b7d79a..f2d0258414cf 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -6,6 +6,7 @@ #define __LINUX_ISA_H #include +#include #include struct isa_driver { @@ -28,7 +29,7 @@ void isa_unregister_driver(struct isa_driver *); #else static inline int isa_register_driver(struct isa_driver *d, unsigned int i) { - return 0; + return -ENODEV; } static inline void isa_unregister_driver(struct isa_driver *d) From f1d048f24e66ba85d3dabf3d076cefa5f2b546b0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 17 Jun 2016 06:35:57 -0400 Subject: [PATCH 0687/1050] tipc: fix socket timer deadlock We sometimes observe a 'deadly embrace' type deadlock occurring between mutually connected sockets on the same node. This happens when the one-hour peer supervision timers happen to expire simultaneously in both sockets. The scenario is as follows: CPU 1: CPU 2: -------- -------- tipc_sk_timeout(sk1) tipc_sk_timeout(sk2) lock(sk1.slock) lock(sk2.slock) msg_create(probe) msg_create(probe) unlock(sk1.slock) unlock(sk2.slock) tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk2) tipc_sk_rcv(sk1) lock(sk2.slock) lock((sk1.slock) filter_rcv() filter_rcv() tipc_sk_proto_rcv() tipc_sk_proto_rcv() msg_create(probe_rsp) msg_create(probe_rsp) tipc_sk_respond() tipc_sk_respond() tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk1) tipc_sk_rcv(sk2) lock((sk1.slock) lock((sk2.slock) ===> DEADLOCK ===> DEADLOCK Further analysis reveals that there are three different locations in the socket code where tipc_sk_respond() is called within the context of the socket lock, with ensuing risk of similar deadlocks. We now solve this by passing a buffer queue along with all upcalls where sk_lock.slock may potentially be held. Response or rejected message buffers are accumulated into this queue instead of being sent out directly, and only sent once we know we are safely outside the slock context. Reported-by: GUNA Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 54 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 88bfcd707064..c49b8df438cb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; + u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); bool conn_cong; @@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); - tipc_sk_respond(sk, skb, TIPC_OK); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); @@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); @@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) int usr = msg_user(hdr); if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, skb, xmitq); return false; } @@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) return true; reject: - tipc_sk_respond(sk, skb, err); + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) + __skb_queue_tail(xmitq, skb); return false; } @@ -1755,9 +1760,24 @@ reject: static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int truesize = skb->truesize; + struct sk_buff_head xmitq; + u32 dnode, selector; - if (likely(filter_rcv(sk, skb))) + __skb_queue_head_init(&xmitq); + + if (likely(filter_rcv(sk, skb, &xmitq))) { atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); + return 0; + } + + if (skb_queue_empty(&xmitq)) + return 0; + + /* Send response/rejected message */ + skb = __skb_dequeue(&xmitq); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport) + u32 dport, struct sk_buff_head *xmitq) { + unsigned long time_limit = jiffies + 2; + struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; - struct sk_buff *skb; - unsigned long time_limit = jiffies + 2; + u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) @@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb); + filter_rcv(sk, skb, xmitq); continue; } @@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, continue; /* Overload => reject message back to sender */ - tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + onode = tipc_own_addr(sock_net(sk)); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); break; } } @@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; + __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); @@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - tipc_sk_enqueue(inputq, sk, dport); + tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } sock_put(sk); continue; } From 63dcdd35c1552ca0c911e98ba3389a0729a457f4 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 17 Jun 2016 15:09:05 +0200 Subject: [PATCH 0688/1050] mlxsw: spectrum: Don't count internal TX header bytes to stats Stop the SW TX counter from counting the TX header bytes since they are not being sent out. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Reviewed-by: Ido Schimmel Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6f9e3ddff4a8..660429ebfbe1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -408,7 +408,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, } mlxsw_sp_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; + /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ From 4e239fac7c6b9d703e83c81b52ec9fec00c50129 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 17 Jun 2016 15:09:06 +0200 Subject: [PATCH 0689/1050] mlxsw: switchx2: Don't count internal TX header bytes to stats Stop the SW TX counter from counting the TX header bytes since they are not being sent out. Fixes: e577516b9db3 ("mlxsw: Fix use-after-free bug in mlxsw_sx_port_xmit") Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3842eab9449a..25f658b3849a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, } } mlxsw_sx_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ From a9836cbb5fc885d3b8f3b91b2d47525f7269dec1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Jun 2016 18:15:30 +0200 Subject: [PATCH 0690/1050] net: tilegx: use correct timespec64 type The conversion to the 64-bit time based ptp methods left two instances of 'struct timespec' in place. This is harmless because 64-bit architectures define timespec64 as timespec, and this driver is not used on 32-bit machines. However, using 'struct timespec64' directly is obviously the right thing to do, and will help us remove 'struct timespec' in the future. Signed-off-by: Arnd Bergmann Fixes: b9acf24f779c ("ptp: tilegx: convert to the 64 bit get/set time methods.") Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/tile/tilegx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 0a15acc075b3..11213a38c795 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance) if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) { struct mpipe_data *md = &mpipe_data[instance]; struct skb_shared_hwtstamps shhwtstamps; - struct timespec ts; + struct timespec64 ts; shtx->tx_flags |= SKBTX_IN_PROGRESS; gxio_mpipe_get_timestamp(&md->context, &ts); @@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = { /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */ static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md) { - struct timespec ts; + struct timespec64 ts; - getnstimeofday(&ts); + ktime_get_ts64(&ts); gxio_mpipe_set_timestamp(&md->context, &ts); mutex_init(&md->ptp_lock); From 3bb549ae4c51028c1930528ae9fcd6eca0474724 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 17 Jun 2016 16:12:12 -0400 Subject: [PATCH 0691/1050] RDS: TCP: rds_tcp_accept_one() should transition socket from RESETTING to UP The state of the rds_connection after rds_tcp_reset_callbacks() would be RDS_CONN_RESETTING and this is the value that should be passed by rds_tcp_accept_one() to rds_connect_path_complete() to transition the socket to RDS_CONN_UP. Fixes: b5c21c0947c1 ("RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()") Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp_listen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 686b1d03a558..245542ca4718 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -138,7 +138,7 @@ int rds_tcp_accept_one(struct socket *sock) rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); + rds_connect_path_complete(conn, RDS_CONN_RESETTING); } } else { rds_tcp_set_callbacks(new_sock, conn); From 3720b69bafe9ccb989b9a8604f3e3f89b3610685 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 18 Jun 2016 10:10:26 -0700 Subject: [PATCH 0692/1050] Input: add BUS_CEC type Inputs can come in over the HDMI CEC bus, so add a new type for this. Signed-off-by: Hans Verkuil Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 01113841190d..c51494119817 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -247,6 +247,7 @@ struct input_mask { #define BUS_ATARI 0x1B #define BUS_SPI 0x1C #define BUS_RMI 0x1D +#define BUS_CEC 0x1E /* * MT_TOOL types From 488326947cd1f038da8d2c9068a0d07b913b7983 Mon Sep 17 00:00:00 2001 From: Kamil Debski Date: Sat, 18 Jun 2016 10:10:56 -0700 Subject: [PATCH 0693/1050] Input: add HDMI CEC specific keycodes Add HDMI CEC specific keycodes to the keycodes definition. Signed-off-by: Kamil Debski Signed-off-by: Hans Verkuil Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 87cf351bab03..737fa32faad4 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -611,6 +611,37 @@ #define KEY_KBDINPUTASSIST_ACCEPT 0x264 #define KEY_KBDINPUTASSIST_CANCEL 0x265 +/* Diagonal movement keys */ +#define KEY_RIGHT_UP 0x266 +#define KEY_RIGHT_DOWN 0x267 +#define KEY_LEFT_UP 0x268 +#define KEY_LEFT_DOWN 0x269 + +#define KEY_ROOT_MENU 0x26a /* Show Device's Root Menu */ +/* Show Top Menu of the Media (e.g. DVD) */ +#define KEY_MEDIA_TOP_MENU 0x26b +#define KEY_NUMERIC_11 0x26c +#define KEY_NUMERIC_12 0x26d +/* + * Toggle Audio Description: refers to an audio service that helps blind and + * visually impaired consumers understand the action in a program. Note: in + * some countries this is referred to as "Video Description". + */ +#define KEY_AUDIO_DESC 0x26e +#define KEY_3D_MODE 0x26f +#define KEY_NEXT_FAVORITE 0x270 +#define KEY_STOP_RECORD 0x271 +#define KEY_PAUSE_RECORD 0x272 +#define KEY_VOD 0x273 /* Video on Demand */ +#define KEY_UNMUTE 0x274 +#define KEY_FASTREVERSE 0x275 +#define KEY_SLOWREVERSE 0x276 +/* + * Control a data application associated with the currently viewed channel, + * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) + */ +#define KEY_DATA 0x275 + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 From 053ea640818812313892ec4f370f5cfac42fd355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 18 Jun 2016 00:54:44 +0200 Subject: [PATCH 0694/1050] hwmon: (dell-smm) Fail in ioctl I8K_BIOS_VERSION when bios version is not a number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ABI of I8K_BIOS_VERSION ioctl can return only number. But new BIOS versions contain also other characters, which does not fit into that ABI. So in case of non digit values return -EINVAL. Reported-by: Mario Limonciello Signed-off-by: Pali Rohár Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index c43318d3416e..480b2fae9541 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -387,6 +388,10 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) switch (cmd) { case I8K_BIOS_VERSION: + if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) || + !isdigit(bios_version[2])) + return -EINVAL; + val = (bios_version[0] << 16) | (bios_version[1] << 8) | bios_version[2]; break; From 7613663cc186f8f3c50279390ddc60286758001c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 18 Jun 2016 00:54:45 +0200 Subject: [PATCH 0695/1050] hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For security reasons ordinary user must not be able to control fan speed via /proc/i8k by default. Some malicious software running under "nobody" user could be able to turn fan off and cause HW problems. So this patch changes default value of "restricted" parameter to 1. Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted" parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from sysfs file /sys/class/dmi/id/product_serial. Old non secure behaviour of file /proc/i8k can be achieved by loading this module with "restricted" parameter set to 0. Note that this patch has effects only for kernels compiled with CONFIG_I8K and only for file /proc/i8k. Hwmon interface provided by this driver was not changed and root access for setting fan speed was needed also before. Reported-by: Mario Limonciello Signed-off-by: Pali Rohár Cc: stable@vger.kernel.org # will need backport Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 480b2fae9541..c8bd3fdd0710 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -67,6 +67,7 @@ static DEFINE_MUTEX(i8k_mutex); static char bios_version[4]; +static char bios_machineid[16]; static struct device *i8k_hwmon_dev; static u32 i8k_hwmon_flags; static uint i8k_fan_mult = I8K_FAN_MULT; @@ -95,13 +96,13 @@ module_param(ignore_dmi, bool, 0); MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match"); #if IS_ENABLED(CONFIG_I8K) -static bool restricted; +static bool restricted = true; module_param(restricted, bool, 0); -MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set"); +MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)"); static bool power_status; module_param(power_status, bool, 0600); -MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k"); +MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)"); #endif static uint fan_mult; @@ -397,9 +398,11 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) break; case I8K_MACHINE_ID: - memset(buff, 0, 16); - strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), - sizeof(buff)); + if (restricted && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(buff, 0, sizeof(buff)); + strlcpy(buff, bios_machineid, sizeof(buff)); break; case I8K_FN_STATUS: @@ -516,7 +519,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset) seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n", I8K_PROC_FMT, bios_version, - i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid, cpu_temp, left_fan, right_fan, left_speed, right_speed, ac_power, fn_key); @@ -985,6 +988,8 @@ static int __init i8k_probe(void) strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); + strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + sizeof(bios_machineid)); /* * Get SMM Dell signature From 2744d2fde00dc8bcc3679eb72c81a63058e90faa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 18 Jun 2016 00:54:46 +0200 Subject: [PATCH 0696/1050] hwmon: (dell-smm) Disallow fan_type() calls on broken machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Dell machines have especially broken SMM or BIOS which cause that once fan_type() is called then CPU fan speed going randomly up and down. And for fixing this behaviour reboot is required. So this patch creates fan_type blacklist of affected Dell machines and disallow fan_type() call on them to prevent that erratic behaviour. Old blacklist which disabled loading driver on some machines added in commits a4b45b25f18d ("hwmon: (dell-smm) Blacklist Dell Studio XPS 8100") and 6220f4ebd7b4 ("hwmon: (dell-smm) Blacklist Dell Studio XPS 8000") were moved to FAN_TYPE blacklist. Reported-by: Jan C Peters Signed-off-by: Pali Rohár Link: https://bugzilla.kernel.org/show_bug.cgi?id=100121 Cc: stable@vger.kernel.org # v4.0+, will need backport Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 36 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index c8bd3fdd0710..4bbc58714868 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -73,6 +73,7 @@ static u32 i8k_hwmon_flags; static uint i8k_fan_mult = I8K_FAN_MULT; static uint i8k_pwm_mult; static uint i8k_fan_max = I8K_FAN_HIGH; +static bool disallow_fan_type_call; #define I8K_HWMON_HAVE_TEMP1 (1 << 0) #define I8K_HWMON_HAVE_TEMP2 (1 << 1) @@ -241,6 +242,9 @@ static int i8k_get_fan_type(int fan) { struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, }; + if (disallow_fan_type_call) + return -EINVAL; + regs.ebx = fan & 0xff; return i8k_smm(®s) ? : regs.eax & 0xff; } @@ -726,6 +730,9 @@ static struct attribute *i8k_attrs[] = { static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr, int index) { + if (disallow_fan_type_call && + (index == 9 || index == 12)) + return 0; if (index >= 0 && index <= 1 && !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1)) return 0; @@ -937,12 +944,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { MODULE_DEVICE_TABLE(dmi, i8k_dmi_table); -static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = { +/* + * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed + * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist + * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call. + * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121 + */ +static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = { { - /* - * CPU fan speed going up and down on Dell Studio XPS 8000 - * for unknown reasons. - */ .ident = "Dell Studio XPS 8000", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), @@ -950,16 +959,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = { }, }, { - /* - * CPU fan speed going up and down on Dell Studio XPS 8100 - * for unknown reasons. - */ .ident = "Dell Studio XPS 8100", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"), }, }, + { + .ident = "Dell Inspiron 580", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "), + }, + }, { } }; @@ -974,8 +986,7 @@ static int __init i8k_probe(void) /* * Get DMI information */ - if (!dmi_check_system(i8k_dmi_table) || - dmi_check_system(i8k_blacklist_dmi_table)) { + if (!dmi_check_system(i8k_dmi_table)) { if (!ignore_dmi && !force) return -ENODEV; @@ -986,6 +997,9 @@ static int __init i8k_probe(void) i8k_get_dmi_data(DMI_BIOS_VERSION)); } + if (dmi_check_system(i8k_blacklist_fan_type_dmi_table)) + disallow_fan_type_call = true; + strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), From 30172936eefb70c27fade1bc912a25fc90827b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 18 Jun 2016 17:47:38 -0700 Subject: [PATCH 0697/1050] =?UTF-8?q?MAINTAINERS:=20add=20Pali=20Roh=C3=A1?= =?UTF-8?q?r=20as=20reviewer=20of=20ALPS=20PS/2=20touchpad=20driver?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pali Rohár Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9c567a431d8d..630151ff1b26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -596,6 +596,10 @@ S: Odd Fixes L: linux-alpha@vger.kernel.org F: arch/alpha/ +ALPS PS/2 TOUCHPAD DRIVER +R: Pali Rohár +F: drivers/input/mouse/alps.* + ALTERA MAILBOX DRIVER M: Ley Foon Tan L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) From 4a7d99ea1b27734558feb6833f180cd38a159940 Mon Sep 17 00:00:00 2001 From: Basil Gunn Date: Thu, 16 Jun 2016 09:42:30 -0700 Subject: [PATCH 0698/1050] AX.25: Close socket connection on session completion A socket connection made in ax.25 is not closed when session is completed. The heartbeat timer is stopped prematurely and this is where the socket gets closed. Allow heatbeat timer to run to close socket. Symptom occurs in kernels >= 4.2.0 Originally sent 6/15/2016. Resend with distribution list matching scripts/maintainer.pl output. Signed-off-by: Basil Gunn Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 3 ++- net/ax25/ax25_ds_timer.c | 5 ++++- net/ax25/ax25_std_timer.c | 5 ++++- net/ax25/ax25_subr.c | 3 ++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fbd0acf80b13..2fdebabbfacd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 951cd57bb07d..5237dff6941d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 004467c9e6e1..2c0d6ef66f9d 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3b78e8473a01..655a7d4c96e1 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); From 5c3da57d70f1ef1d9b60900b84a74d77a9cf0774 Mon Sep 17 00:00:00 2001 From: Joshua Houghton Date: Sat, 18 Jun 2016 15:46:31 +0000 Subject: [PATCH 0699/1050] net: rds: fix coding style issues Fix coding style issues in the following files: ib_cm.c: add space loop.c: convert spaces to tabs sysctl.c: add space tcp.h: convert spaces to tabs tcp_connect.c:remove extra indentation in switch statement tcp_recv.c: convert spaces to tabs tcp_send.c: convert spaces to tabs transport.c: move brace up one line on for statement Signed-off-by: Joshua Houghton Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 2 +- net/rds/loop.c | 5 +++-- net/rds/sysctl.c | 3 ++- net/rds/tcp.h | 2 +- net/rds/tcp_connect.c | 26 +++++++++++++------------- net/rds/tcp_recv.c | 2 +- net/rds/tcp_send.c | 14 +++++++------- net/rds/transport.c | 3 +-- 8 files changed, 29 insertions(+), 28 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 310cabce2311..7c2a65a6af5c 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -111,7 +111,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3,1)) { + if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, diff --git a/net/rds/loop.c b/net/rds/loop.c index 6b12b68541ae..814173b466d9 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -95,8 +95,9 @@ out: */ static void rds_loop_inc_free(struct rds_incoming *inc) { - struct rds_message *rm = container_of(inc, struct rds_message, m_inc); - rds_message_put(rm); + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + + rds_message_put(rm); } /* we need to at least give the thread something to succeed */ diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c173f69e1479..e381bbcd9cc1 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -102,7 +102,8 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); + rds_sysctl_reg_table = + register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index ec0602b0dc24..7940babf6c71 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -83,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_tcp_xmit_prepare(struct rds_connection *conn); void rds_tcp_xmit_complete(struct rds_connection *conn); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off); + unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); /* tcp_stats.c */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fba13d0305fb..f6e95d60db54 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -54,19 +54,19 @@ void rds_tcp_state_change(struct sock *sk) rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); - switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - rds_connect_path_complete(conn, RDS_CONN_CONNECTING); - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSE: - rds_conn_drop(conn); - default: - break; + switch (sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); + break; + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; } out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c3196f9d070a..6e6a7111a034 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -171,7 +171,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 22d0f2020a79..618be69c9c3b 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -66,19 +66,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn) static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) { struct kvec vec = { - .iov_base = data, - .iov_len = len, + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, }; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, - }; return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); } /* the core send_sem serializes this with other xmit and shutdown */ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off) + unsigned int hdr_off, unsigned int sg, unsigned int off) { struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; @@ -196,7 +196,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: diff --git a/net/rds/transport.c b/net/rds/transport.c index f3afd1d60d3c..2ffd3e30c643 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, rds_info_iter_unmap(iter); down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) - { + for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; From 0e289e534af1b20417a1940c8eba549588671cd8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 17 Jun 2016 13:44:18 +0200 Subject: [PATCH 0700/1050] ARM: dts: STi: stih407-family: Disable reserved-memory co-processor nodes This patch fixes a non-booting issue in Mainline. When booting with a compressed kernel, we need to be careful how we populate memory close to DDR start. AUTO_ZRELADDR is enabled by default in multi-arch enabled configurations, which place some restrictions on where the kernel is placed and where it will be uncompressed to on boot. AUTO_ZRELADDR takes the decompressor code's start address and masks out the bottom 28 bits to obtain an address to uncompress the kernel to (thus a load address of 0x42000000 means that the kernel will be uncompressed to 0x40000000 i.e. DDR START on this platform). Even changing the load address to after the co-processor's shared memory won't render a booting platform, since the AUTO_ZRELADDR algorithm still ensures the kernel is uncompressed into memory shared with the first co-processor (0x40000000). Another option would be to move loading to 0x4A000000, since this will mean the decompressor will decompress the kernel to 0x48000000. However, this would mean a large chunk (0x44000000 => 0x48000000 (64MB)) of memory would essentially be wasted for no good reason. Until we can work with ST to find a suitable memory location to relocate co-processor shared memory, let's disable the shared memory nodes. This will ensure a working platform in the mean time. NB: The more observant of you will notice that we're leaving the DMU shared memory node enabled; this is because a) it is the only one in active use at the time of this writing and b) it is not affected by the current default behaviour which is causing issues. Fixes: fe135c6 (ARM: dts: STiH407: Move over to using the 'reserved-memory' API for obtaining DMA memory) Signed-off-by: Lee Jones Reviewed-by Peter Griffin Signed-off-by: Maxime Coquelin Signed-off-by: Olof Johansson --- arch/arm/boot/dts/stih407-family.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ad8ba10764a3..d294e82447a2 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -24,18 +24,21 @@ compatible = "shared-dma-pool"; reg = <0x40000000 0x01000000>; no-map; + status = "disabled"; }; gp1_reserved: rproc@41000000 { compatible = "shared-dma-pool"; reg = <0x41000000 0x01000000>; no-map; + status = "disabled"; }; audio_reserved: rproc@42000000 { compatible = "shared-dma-pool"; reg = <0x42000000 0x01000000>; no-map; + status = "disabled"; }; dmu_reserved: rproc@43000000 { From d90efc9ee0df3f13f248aaf47086b1e0ed6035ae Mon Sep 17 00:00:00 2001 From: Yakir Yang Date: Wed, 8 Jun 2016 10:13:27 -0400 Subject: [PATCH 0701/1050] drm/exynos: dp: Fix NULL pointer dereference due uninitialized connector Commit 3424e3a4f844 ("drm: bridge: analogix/dp: split exynos dp driver to bridge directory") split the Exynos DP core driver into a core driver and a bridge driver for the Analogix chip since that is also used by Rockchip. But the change introduced a regression causing a NULL pointer dereference when trying to access an uninitialized connector in the driver .get_modes: Fix this by instead of having a connector struct for both the Exynos and Analogix drivers, just use the connector initialized in the bridge driver. Fixes: 3424e3a4f844 ("drm: bridge: analogix/dp: split exynos dp driver to bridge directory") Reported-by: Marc Zyngier Signed-off-by: Yakir Yang Tested-by: Marc Zyngier Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_dp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 468498e3fec1..4c1fb3f8b5a6 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -34,7 +34,7 @@ struct exynos_dp_device { struct drm_encoder encoder; - struct drm_connector connector; + struct drm_connector *connector; struct drm_bridge *ptn_bridge; struct drm_device *drm_dev; struct device *dev; @@ -70,7 +70,7 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data) static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data) { struct exynos_dp_device *dp = to_dp(plat_data); - struct drm_connector *connector = &dp->connector; + struct drm_connector *connector = dp->connector; struct drm_display_mode *mode; int num_modes = 0; @@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data, int ret; drm_connector_register(connector); + dp->connector = connector; /* Pre-empt DP connector creation if there's a bridge */ if (dp->ptn_bridge) { From 5e0b37634cbb6bae69b7b15e1ea5f054dfaaa413 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 30 May 2016 20:20:22 -0400 Subject: [PATCH 0702/1050] drm/exynos: fimd: don't set .has_hw_trigger in s3c6400 driver data The field value is only checked in fimd_setup_trigger() if .trg_type is I80_HW_TRG so there's no point in setting this field for the s3c6400 if is never going to be used since .trg_type is not set. Signed-off-by: Javier Martinez Canillas Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 3efe1aa89416..1c23a8ff5e83 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -120,7 +120,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = { .timing_base = 0x0, .has_clksel = 1, .has_limited_fmt = 1, - .has_hw_trigger = 1, }; static struct fimd_driver_data exynos3_fimd_driver_data = { From e0d7461ceb5bb9b50e534262eb23e92deaaae6f1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 2 Jun 2016 10:20:10 -0400 Subject: [PATCH 0703/1050] drm/exynos: don't use HW trigger for Exynos5420/5422/5800 Commit a6f75aa161c5 ("drm/exynos: fimd: add HW trigger support") added hardware trigger support to the FIMD controller driver. But this broke the display in at least the Exynos5800 Peach Pi Chromebook. So until the issue is fixed, avoid using HW trigger for the Exynos5420 based boards and use SW trigger as it was before the mentioned commit. Signed-off-by: Javier Martinez Canillas Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1c23a8ff5e83..f10030ff00e6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -170,14 +170,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = { .lcdblk_vt_shift = 24, .lcdblk_bypass_shift = 15, .lcdblk_mic_bypass_shift = 11, - .trg_type = I80_HW_TRG, .has_shadowcon = 1, .has_vidoutcon = 1, .has_vtsel = 1, .has_mic_bypass = 1, .has_dp_clk = 1, - .has_hw_trigger = 1, - .has_trigger_per_te = 1, }; struct fimd_context { From ed4dc2718d3510f0fa5e6794f47b98549848f656 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 25 May 2016 14:42:56 +0200 Subject: [PATCH 0704/1050] drm/exynos: g2d: drop the _REG postfix from the stride defines This makes the defines consistent with the rest. Signed-off-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 493552368295..8564c3da0d22 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -48,13 +48,13 @@ /* registers for base address */ #define G2D_SRC_BASE_ADDR 0x0304 -#define G2D_SRC_STRIDE_REG 0x0308 +#define G2D_SRC_STRIDE 0x0308 #define G2D_SRC_COLOR_MODE 0x030C #define G2D_SRC_LEFT_TOP 0x0310 #define G2D_SRC_RIGHT_BOTTOM 0x0314 #define G2D_SRC_PLANE2_BASE_ADDR 0x0318 #define G2D_DST_BASE_ADDR 0x0404 -#define G2D_DST_STRIDE_REG 0x0408 +#define G2D_DST_STRIDE 0x0408 #define G2D_DST_COLOR_MODE 0x040C #define G2D_DST_LEFT_TOP 0x0410 #define G2D_DST_RIGHT_BOTTOM 0x0414 @@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset) switch (reg_offset) { case G2D_SRC_BASE_ADDR: - case G2D_SRC_STRIDE_REG: + case G2D_SRC_STRIDE: case G2D_SRC_COLOR_MODE: case G2D_SRC_LEFT_TOP: case G2D_SRC_RIGHT_BOTTOM: @@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset) reg_type = REG_TYPE_SRC_PLANE2; break; case G2D_DST_BASE_ADDR: - case G2D_DST_STRIDE_REG: + case G2D_DST_STRIDE: case G2D_DST_COLOR_MODE: case G2D_DST_LEFT_TOP: case G2D_DST_RIGHT_BOTTOM: @@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev, } else buf_info->types[reg_type] = BUF_TYPE_GEM; break; - case G2D_SRC_STRIDE_REG: - case G2D_DST_STRIDE_REG: + case G2D_SRC_STRIDE: + case G2D_DST_STRIDE: if (for_addr) goto err; From f0fcf43f285cdbc1bbc372919d68aea0cf4483d6 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 25 May 2016 14:35:41 +0200 Subject: [PATCH 0705/1050] drm/exynos: remove superfluous inclusions of fbdev header Neither of these files issue any fbdev related calls. Signed-off-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 1 - drivers/gpu/drm/exynos/exynos_drm_core.c | 1 - drivers/gpu/drm/exynos/exynos_drm_fimd.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f6223f907c15..7f9901b7777b 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -31,7 +31,6 @@ #include "exynos_drm_plane.h" #include "exynos_drm_drv.h" #include "exynos_drm_fb.h" -#include "exynos_drm_fbdev.h" #include "exynos_drm_iommu.h" /* diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c index 011211e4167d..edbd98ff293e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_core.c +++ b/drivers/gpu/drm/exynos/exynos_drm_core.c @@ -15,7 +15,6 @@ #include #include "exynos_drm_drv.h" #include "exynos_drm_crtc.h" -#include "exynos_drm_fbdev.h" static LIST_HEAD(exynos_drm_subdrv_list); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index f10030ff00e6..d47216488985 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -30,7 +30,6 @@ #include "exynos_drm_drv.h" #include "exynos_drm_fb.h" -#include "exynos_drm_fbdev.h" #include "exynos_drm_crtc.h" #include "exynos_drm_plane.h" #include "exynos_drm_iommu.h" From 41abbf5afa51136bcb2aeefc80bf5c3a005d0aa3 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 25 May 2016 14:30:07 +0200 Subject: [PATCH 0706/1050] drm/exynos: use logical AND in exynos_drm_plane_check_size() The current bitwise AND should result in the same assembler but this is what the code is actually supposed to do. Signed-off-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 55f1d37c666a..77f12c00abf9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -242,7 +242,7 @@ exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config, state->v_ratio == (1 << 15)) height_ok = true; - if (width_ok & height_ok) + if (width_ok && height_ok) return 0; DRM_DEBUG_KMS("scaling mode is not supported"); From e6bd89232b4a4c9282da1ac8bdd798c2a30d9a46 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:11 +0300 Subject: [PATCH 0707/1050] qed: Correct default vlan behavior When no vlan filter is configured, firmware has a configurable default on whether to pass only untagged packets or all packets regardless of their tagging. Driver currently doesn't set this field in the necessary ramrod, causing the default to always be 'receive all'. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 8fba87dd48af..7c199a94cbfc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->mtu = cpu_to_le16(p_params->mtu); p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan; p_ramrod->drop_ttl0_en = p_params->drop_ttl0; + p_ramrod->untagged = p_params->only_untagged; SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1); SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1); From 326439883e17fca029f4ed05307480bdb6369877 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:12 +0300 Subject: [PATCH 0708/1050] qed: Prevent VF from Tx-switching 'promisc' Internal loopback in driver is based on two things - first is the willingness of transmitter to use it [in case of VFs, this can be forced based on VEPA/VEB] and secondly on another vport classification configuration which should match the packet's destination. Current code allows non-linux VFs to configure a 'promisc' mode on Tx, meaning all traffic sent by VF would be loopbacked internally by firmware; This isn't considered a valid mode and as such should be prevented by PF. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7c199a94cbfc..7e7ae83453d5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -248,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL, !!(accept_filter & QED_ACCEPT_NONE)); - SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, - (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && - !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); - SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL, !!(accept_filter & QED_ACCEPT_NONE)); From a0d26d5a4fc8e13993279f788deeb08069e73b69 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:13 +0300 Subject: [PATCH 0709/1050] qed*: Don't reset statistics on inner reload Several user APIs can cause driver to perform an inner-reload. Currently, doing this would cause the HW/FW statistics of the adapter to reset, which isn't the expected behavior [statistics should only reset on explicit unloads]. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 ++- drivers/net/ethernet/qlogic/qede/qede_main.c | 7 ++++--- include/linux/qed/qed_eth_if.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7e7ae83453d5..aada4c7e095f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1745,7 +1745,8 @@ static int qed_start_vport(struct qed_dev *cdev, start.vport_id, start.mtu); } - qed_reset_vport_stats(cdev); + if (params->clear_stats) + qed_reset_vport_stats(cdev); return 0; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5733d1888223..f8e11f953acb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -3231,7 +3231,7 @@ static int qede_stop_queues(struct qede_dev *edev) return rc; } -static int qede_start_queues(struct qede_dev *edev) +static int qede_start_queues(struct qede_dev *edev, bool clear_stats) { int rc, tc, i; int vlan_removal_en = 1; @@ -3462,6 +3462,7 @@ out: enum qede_load_mode { QEDE_LOAD_NORMAL, + QEDE_LOAD_RELOAD, }; static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) @@ -3500,7 +3501,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) goto err3; DP_INFO(edev, "Setup IRQs succeeded\n"); - rc = qede_start_queues(edev); + rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD); if (rc) goto err4; DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n"); @@ -3555,7 +3556,7 @@ void qede_reload(struct qede_dev *edev, if (func) func(edev, args); - qede_load(edev, QEDE_LOAD_NORMAL); + qede_load(edev, QEDE_LOAD_RELOAD); mutex_lock(&edev->qede_lock); qede_config_rx_mode(edev->ndev); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 6ae8cb4a61d3..6c876a63558d 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -49,6 +49,7 @@ struct qed_start_vport_params { bool drop_ttl0; u8 vport_id; u16 mtu; + bool clear_stats; }; struct qed_stop_rxq_params { From db511c37d4eb2b4e7312791eb8f9b34ed867445e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:14 +0300 Subject: [PATCH 0710/1050] qed: Fix returning unlimited SPQ entries Driver has 2 sets of entries for handling ramrod configurations toward firmware - a regular pre-allocated set of entires and a possible 'unlimited' list of additional pending entries. In most scenarios the 'unlimited' list would not be used, but when it does the handling of the ramrod completion doesn't properly handle the release of the entry. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index acac6626a1b2..67d9893774d8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -614,7 +614,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn, *p_en2 = *p_ent; - kfree(p_ent); + /* EBLOCK responsible to free the allocated p_ent */ + if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) + kfree(p_ent); p_ent = p_en2; } @@ -749,6 +751,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, * Thus, after gaining the answer perform the cleanup here. */ rc = qed_spq_block(p_hwfn, p_ent, fw_return_code); + + if (p_ent->queue == &p_spq->unlimited_pending) { + /* This is an allocated p_ent which does not need to + * return to pool. + */ + kfree(p_ent); + return rc; + } + if (rc) goto spq_post_fail2; @@ -844,8 +855,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data, fw_return_code); - if (found->comp_mode != QED_SPQ_MODE_EBLOCK) - /* EBLOCK is responsible for freeing its own entry */ + if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || + (found->queue == &p_spq->unlimited_pending)) + /* EBLOCK is responsible for returning its own entry into the + * free list, unless it originally added the entry into the + * unlimited pending list. + */ qed_spq_return_entry(p_hwfn, found); /* Attempt to post pending requests */ From b639f197210d37905a6018ae4297659eb3f48f8f Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:15 +0300 Subject: [PATCH 0711/1050] qed: Add missing port-mode The 'MODULE_FIBER' value replaced several other FIBER values in newer management firmware images, so existing code would fail to properly reflect its mode. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 9afc15fdbb02..e29ed5a69566 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3700,6 +3700,7 @@ struct public_port { #define MEDIA_DA_TWINAX 0x3 #define MEDIA_BASE_T 0x4 #define MEDIA_SFP_1G_FIBER 0x5 +#define MEDIA_MODULE_FIBER 0x6 #define MEDIA_KR 0xf0 #define MEDIA_NOT_PRESENT 0xff diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 61cc6869fa65..c7e01b303540 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1085,6 +1085,7 @@ static int qed_get_port_type(u32 media_type) case MEDIA_SFPP_10G_FIBER: case MEDIA_SFP_1G_FIBER: case MEDIA_XFP_FIBER: + case MEDIA_MODULE_FIBER: case MEDIA_KR: port_type = PORT_FIBRE; break; From 33688abb2802ff3a230bd2441f765477b94cc89e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Jun 2016 21:30:02 -0700 Subject: [PATCH 0712/1050] Linux 4.7-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b409076c7c18..4fb6beac5f09 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 427460c83cdf55069eee49799a0caef7dde8df69 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 16 Jun 2016 11:10:19 -0500 Subject: [PATCH 0713/1050] can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera Cyclone access When testing CAN write floods on Altera's CycloneV, the first 2 bytes are sometimes 0x00, 0x00 or corrupted instead of the values sent. Also observed bytes 4 & 5 were corrupted in some cases. The D_CAN Data registers are 32 bits and changing from 16 bit writes to 32 bit writes fixes the problem. Testing performed on Altera CycloneV (D_CAN). Requesting tests on other C_CAN & D_CAN platforms. Reported-by: Richard Andrysek Signed-off-by: Thor Thayer Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 38 ++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index f91b094288da..e3dccd3200d5 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface, priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl); - for (i = 0; i < frame->can_dlc; i += 2) { - priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2, - frame->data[i] | (frame->data[i + 1] << 8)); + if (priv->type == BOSCH_D_CAN) { + u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface); + + for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) { + data = (u32)frame->data[i]; + data |= (u32)frame->data[i + 1] << 8; + data |= (u32)frame->data[i + 2] << 16; + data |= (u32)frame->data[i + 3] << 24; + priv->write_reg32(priv, dreg, data); + } + } else { + for (i = 0; i < frame->can_dlc; i += 2) { + priv->write_reg(priv, + C_CAN_IFACE(DATA1_REG, iface) + i / 2, + frame->data[i] | + (frame->data[i + 1] << 8)); + } } } @@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl) } else { int i, dreg = C_CAN_IFACE(DATA1_REG, iface); - for (i = 0; i < frame->can_dlc; i += 2, dreg ++) { - data = priv->read_reg(priv, dreg); - frame->data[i] = data; - frame->data[i + 1] = data >> 8; + if (priv->type == BOSCH_D_CAN) { + for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) { + data = priv->read_reg32(priv, dreg); + frame->data[i] = data; + frame->data[i + 1] = data >> 8; + frame->data[i + 2] = data >> 16; + frame->data[i + 3] = data >> 24; + } + } else { + for (i = 0; i < frame->can_dlc; i += 2, dreg++) { + data = priv->read_reg(priv, dreg); + frame->data[i] = data; + frame->data[i + 1] = data >> 8; + } } } From 43200a4480cbbe660309621817f54cbb93907108 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Mon, 13 Jun 2016 15:44:19 +0200 Subject: [PATCH 0714/1050] can: at91_can: RX queue could get stuck at high bus load At high bus load it could happen that "at91_poll()" enters with all RX message boxes filled up. If then at the end the "quota" is exceeded as well, "rx_next" will not be reset to the first RX mailbox and hence the interrupts remain disabled. Signed-off-by: Wolfgang Grandegger Tested-by: Amr Bekhit Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 8b3275d7792a..8f5e93cb7975 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota) /* upper group completed, look again in lower */ if (priv->rx_next > get_mb_rx_low_last(priv) && - quota > 0 && mb > get_mb_rx_last(priv)) { + mb > get_mb_rx_last(priv)) { priv->rx_next = get_mb_rx_first(priv); - goto again; + if (quota > 0) + goto again; } return received; From f155d9c0a138463c3c9380d35e54e433c1477336 Mon Sep 17 00:00:00 2001 From: Maximilian Schneider Date: Wed, 8 Jun 2016 18:00:26 +0000 Subject: [PATCH 0715/1050] can: gs_usb: Add Basic support for the bytewerk.org candleLight interface This patchs adds basic support for the bytewerk.org candleLight interface, a open hardware (CERN OHL) USB CAN adapter. Signed-off-by: Hubert Denkmair Signed-off-by: Maximilian Schneider Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 3 ++- drivers/net/can/usb/gs_usb.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index bcb272f6c68a..2ff0df32b3d1 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -16,7 +16,8 @@ config CAN_ESD_USB2 config CAN_GS_USB tristate "Geschwister Schneider UG interfaces" ---help--- - This driver supports the Geschwister Schneider USB/CAN devices. + This driver supports the Geschwister Schneider and bytewerk.org + candleLight USB CAN interfaces USB/CAN devices If unsure choose N, choose Y for built in support, M to compile as module (module will be named: gs_usb). diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 1556d4286235..acb0c8490673 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1,7 +1,9 @@ -/* CAN driver for Geschwister Schneider USB/CAN devices. +/* CAN driver for Geschwister Schneider USB/CAN devices + * and bytewerk.org candleLight USB CAN interfaces. * - * Copyright (C) 2013 Geschwister Schneider Technologie-, + * Copyright (C) 2013-2016 Geschwister Schneider Technologie-, * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt). + * Copyright (C) 2016 Hubert Denkmair * * Many thanks to all socketcan devs! * @@ -29,6 +31,9 @@ #define USB_GSUSB_1_VENDOR_ID 0x1d50 #define USB_GSUSB_1_PRODUCT_ID 0x606f +#define USB_CANDLELIGHT_VENDOR_ID 0x1209 +#define USB_CANDLELIGHT_PRODUCT_ID 0x2323 + #define GSUSB_ENDPOINT_IN 1 #define GSUSB_ENDPOINT_OUT 2 @@ -952,6 +957,8 @@ static void gs_usb_disconnect(struct usb_interface *intf) static const struct usb_device_id gs_usb_table[] = { { USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID, USB_GSUSB_1_PRODUCT_ID, 0) }, + { USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID, + USB_CANDLELIGHT_PRODUCT_ID, 0) }, {} /* Terminating entry */ }; @@ -969,5 +976,6 @@ module_usb_driver(gs_usb_driver); MODULE_AUTHOR("Maximilian Schneider "); MODULE_DESCRIPTION( "Socket CAN device driver for Geschwister Schneider Technologie-, " -"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces."); +"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n" +"and bytewerk.org candleLight USB CAN interfaces."); MODULE_LICENSE("GPL v2"); From 8974189222159154c55f24ddad33e3613960521a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jun 2016 10:50:40 +0200 Subject: [PATCH 0716/1050] sched/fair: Fix cfs_rq avg tracking underflow As per commit: b7fa30c9cc48 ("sched/fair: Fix post_init_entity_util_avg() serialization") > the code generated from update_cfs_rq_load_avg(): > > if (atomic_long_read(&cfs_rq->removed_load_avg)) { > s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); > sa->load_avg = max_t(long, sa->load_avg - r, 0); > sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); > removed_load = 1; > } > > turns into: > > ffffffff81087064: 49 8b 85 98 00 00 00 mov 0x98(%r13),%rax > ffffffff8108706b: 48 85 c0 test %rax,%rax > ffffffff8108706e: 74 40 je ffffffff810870b0 > ffffffff81087070: 4c 89 f8 mov %r15,%rax > ffffffff81087073: 49 87 85 98 00 00 00 xchg %rax,0x98(%r13) > ffffffff8108707a: 49 29 45 70 sub %rax,0x70(%r13) > ffffffff8108707e: 4c 89 f9 mov %r15,%rcx > ffffffff81087081: bb 01 00 00 00 mov $0x1,%ebx > ffffffff81087086: 49 83 7d 70 00 cmpq $0x0,0x70(%r13) > ffffffff8108708b: 49 0f 49 4d 70 cmovns 0x70(%r13),%rcx > > Which you'll note ends up with sa->load_avg -= r in memory at > ffffffff8108707a. So I _should_ have looked at other unserialized users of ->load_avg, but alas. Luckily nikbor reported a similar /0 from task_h_load() which instantly triggered recollection of this here problem. Aside from the intermediate value hitting memory and causing problems, there's another problem: the underflow detection relies on the signed bit. This reduces the effective width of the variables, IOW its effectively the same as having these variables be of signed type. This patch changes to a different means of unsigned underflow detection to not rely on the signed bit. This allows the variables to use the 'full' unsigned range. And it does so with explicit LOAD - STORE to ensure any intermediate value will never be visible in memory, allowing these unserialized loads. Note: GCC generates crap code for this, might warrant a look later. Note2: I say 'full' above, if we end up at U*_MAX we'll still explode; maybe we should do clamping on add too. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrey Ryabinin Cc: Chris Wilson Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yuyang Du Cc: bsegall@google.com Cc: kernel@kyup.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: steve.muckle@linaro.org Fixes: 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking") Link: http://lkml.kernel.org/r/20160617091948.GJ30927@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a2348deab7a3..2ae68f0e3bf5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) } } +/* + * Unsigned subtract and clamp on underflow. + * + * Explicitly do a load-store to ensure the intermediate value never hits + * memory. This allows lockless observations without ever seeing the negative + * values. + */ +#define sub_positive(_ptr, _val) do { \ + typeof(_ptr) ptr = (_ptr); \ + typeof(*ptr) val = (_val); \ + typeof(*ptr) res, var = READ_ONCE(*ptr); \ + res = var - val; \ + if (res > var) \ + res = 0; \ + WRITE_ONCE(*ptr, res); \ +} while (0) + /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) @@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) if (atomic_long_read(&cfs_rq->removed_load_avg)) { s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); - sa->load_avg = max_t(long, sa->load_avg - r, 0); - sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); + sub_positive(&sa->load_avg, r); + sub_positive(&sa->load_sum, r * LOAD_AVG_MAX); removed_load = 1; } if (atomic_long_read(&cfs_rq->removed_util_avg)) { long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); - sa->util_avg = max_t(long, sa->util_avg - r, 0); - sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); + sub_positive(&sa->util_avg, r); + sub_positive(&sa->util_sum, r * LOAD_AVG_MAX); removed_util = 1; } @@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s &se->avg, se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL); - cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); - cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); - cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); - cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); + sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); + sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); + sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); + sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); cfs_rq_util_change(cfs_rq); } From 70c8217acd4383e069fe1898bbad36ea4fcdbdcc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 17 Jun 2016 16:10:42 -0400 Subject: [PATCH 0717/1050] tracing: Handle NULL formats in hold_module_trace_bprintk_format() If a task uses a non constant string for the format parameter in trace_printk(), then the trace_printk_fmt variable is set to NULL. This variable is then saved in the __trace_printk_fmt section. The function hold_module_trace_bprintk_format() checks to see if duplicate formats are used by modules, and reuses them if so (saves them to the list if it is new). But this function calls lookup_format() that does a strcmp() to the value (which is now NULL) and can cause a kernel oops. This wasn't an issue till 3debb0a9ddb ("tracing: Fix trace_printk() to print when not using bprintk()") which added "__used" to the trace_printk_fmt variable, and before that, the kernel simply optimized it out (no NULL value was saved). The fix is simply to handle the NULL pointer in lookup_format() and have the caller ignore the value if it was NULL. Link: http://lkml.kernel.org/r/1464769870-18344-1-git-send-email-zhengjun.xing@intel.com Reported-by: xingzhen Acked-by: Namhyung Kim Fixes: 3debb0a9ddb ("tracing: Fix trace_printk() to print when not using bprintk()") Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Steven Rostedt --- kernel/trace/trace_printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index f96f0383f6c6..ad1d6164e946 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -36,6 +36,10 @@ struct trace_bprintk_fmt { static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) { struct trace_bprintk_fmt *pos; + + if (!fmt) + return ERR_PTR(-EINVAL); + list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { if (!strcmp(pos->fmt, fmt)) return pos; @@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) for (iter = start; iter < end; iter++) { struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); if (tb_fmt) { - *iter = tb_fmt->fmt; + if (!IS_ERR(tb_fmt)) + *iter = tb_fmt->fmt; continue; } From 0ded5174e976e2b2a354fe38abf1ebf4492c6dc3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 May 2016 15:06:30 -0400 Subject: [PATCH 0718/1050] ftracetest: Fix hist unsupported result in hist selftests When histograms are not configured in the kernel, the ftracetest histogram selftests should return "unsupported" and not "Failed". To detect this, the test scripts have: FEATURE=`grep hist events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "hist trigger is not supported" exit_unsupported fi The problem is that '-e' is in effect and any error will cause the program to terminate. The grep for 'hist' fails, because it is not compiled it (thus unsupported), but because grep has an error code for failing to find the string, it causes the program to terminate, and is marked as a failed test. Namhyung Kim recommended to test for the "hist" file located in events/sched/sched_process_fork/hist instead, as it is more inline with the other checks. As the hist file is only created if the histogram feature is enabled, that is a valid check. Link: http://lkml.kernel.org/r/20160523151538.4ea9ce0c@gandalf.local.home Suggested-by: Namhyung Kim Acked-by: Namhyung Kim Acked-by: Masami Hiramatsu Fixes: 76929ab51f0ee ("kselftests/ftrace: Add hist trigger testcases") Signed-off-by: Steven Rostedt --- .../selftests/ftrace/test.d/trigger/trigger-hist-mod.tc | 9 ++++----- .../selftests/ftrace/test.d/trigger/trigger-hist.tc | 9 ++++----- .../selftests/ftrace/test.d/trigger/trigger-multihist.tc | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index c2b61c4fda11..0bf5085281f3 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram with execname modifier" echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index b2902d42a537..a00184cd9c95 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram basic tigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 03c4a46561fc..3478b00ead57 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + reset_trigger echo "Test histogram multiple tiggers" From e7d6ef9790bc281f5c29d0132b68031248523fe8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2016 01:35:59 -0400 Subject: [PATCH 0719/1050] fix idiotic braino in d_alloc_parallel() Check for d_unhashed() while searching in in-lookup hash was absolutely wrong. Worse, it masked a deadlock on dget() done under bitlock that nests inside ->d_lock. Thanks to J. R. Okajima for spotting it. Spotted-by: "J. R. Okajima" Wearing-brown-paperbag: Al Viro Signed-off-by: Al Viro --- fs/dcache.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b7eddfd35aa5..d6847d7b123d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2503,7 +2503,6 @@ retry: rcu_read_unlock(); goto retry; } - rcu_read_unlock(); /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), @@ -2516,8 +2515,6 @@ retry: continue; if (dentry->d_parent != parent) continue; - if (d_unhashed(dentry)) - continue; if (parent->d_flags & DCACHE_OP_COMPARE) { int tlen = dentry->d_name.len; const char *tname = dentry->d_name.name; @@ -2529,9 +2526,18 @@ retry: if (dentry_cmp(dentry, str, len)) continue; } - dget(dentry); hlist_bl_unlock(b); - /* somebody is doing lookup for it right now; wait for it */ + /* now we can try to grab a reference */ + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + + rcu_read_unlock(); + /* + * somebody is likely to be still doing lookup for it; + * wait for them to finish + */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* @@ -2562,6 +2568,7 @@ retry: dput(new); return dentry; } + rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; From ef0dab4aae14e25efddf1577736f8450132800c5 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sat, 18 Jun 2016 23:52:25 -0700 Subject: [PATCH 0720/1050] PCI: Fix unaligned accesses in VC code The save/restore buffers for VC state is first composed of a 2-byte control register, then a bunch of 4-byte words. This causes unaligned accesses which trap on platform such as sparc. This is easy to fix by simply moving the buffer pointer forward by 4 bytes instead of 2 after dealing with the control register. The length adjustment needs to be changed likewise as well. Fixes: 5f8fc43217a0 ("PCI: Include pci/pcie/Kconfig directly from pci/Kconfig") Reported-by: Meelis Roos Reported-by: Anatoly Pugachev Signed-off-by: David S. Miller Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.6+ --- drivers/pci/vc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c index dfbab61a1b47..1fa3a3219c45 100644 --- a/drivers/pci/vc.c +++ b/drivers/pci/vc.c @@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos, else pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL, *(u16 *)buf); - buf += 2; + buf += 4; } - len += 2; + len += 4; /* * If we have any Low Priority VCs and a VC Arbitration Table Offset From 48a70e1ca85e3b484791e100bb403c05ef9d37c8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 18 Jun 2016 11:38:44 +0300 Subject: [PATCH 0721/1050] drm/amdgpu: precedence bug in amdgpu_device_init() ! has higher precedence than bitwise & so we need to add parenthesis for this to work as intended. Fixes: 048765ad5af7 ('amdgpu: fix asic initialization for virtualized environments (v2)') Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66482b429458..6e920086af46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1535,7 +1535,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Post card if necessary */ if (!amdgpu_card_posted(adev) || (adev->virtualization.is_virtual && - !adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN)) { + !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; From 29b9c528b8c295911e8b1e515273e89a2b7fa2d8 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sat, 18 Jun 2016 22:55:00 +0200 Subject: [PATCH 0722/1050] drm/amdgpu: initialize amdgpu_cgs_acpi_eval_object result value amdgpu_cgs_acpi_eval_object() returned the value of variable "result" without initializing it first. This bug has been found by compiling the kernel with clang. The compiler complained: drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:972:14: error: variable 'result' is used uninitialized whenever 'for' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized] for (i = 0; i < count; i++) { ^~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:1011:9: note: uninitialized use occurs here return result; ^~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:972:14: note: remove the condition if it is always true for (i = 0; i < count; i++) { ^~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:864:12: note: initialize the variable 'result' to silence this warning int result; ^ = 0 Fixes: 3f1d35a03b3c ("drm/amdgpu: implement new cgs interface for acpi function") Signed-off-by: Nicolas Iooss Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 8943099eb135..cf6f49fc1c75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -909,7 +909,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, struct cgs_acpi_method_argument *argument = NULL; uint32_t i, count; acpi_status status; - int result; + int result = 0; uint32_t func_no = 0xFFFFFFFF; handle = ACPI_HANDLE(&adev->pdev->dev); From 274f5b041d3c9c0974a7dd1f66b67c33bb5b0f42 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2016 18:55:57 -0400 Subject: [PATCH 0723/1050] dcache_{readdir,dir_lseek}(): don't bother with nested ->d_lock Make sure that directory is locked shared in dcache_dir_lseek(); for dcache_readdir() it's already tru, and that's enough to make simple_positive() stable. Signed-off-by: Al Viro --- fs/libfs.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index cedeacbae303..f56acb1e5fbc 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -103,6 +103,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) struct dentry *cursor = file->private_data; loff_t n = file->f_pos - 2; + inode_lock_shared(dentry->d_inode); spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ list_del(&cursor->d_child); @@ -110,14 +111,13 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) while (n && p != &dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; - spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_child, p); spin_unlock(&dentry->d_lock); + inode_unlock_shared(dentry->d_inode); } } return offset; @@ -150,22 +150,16 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) for (p = q->next; p != &dentry->d_subdirs; p = p->next) { struct dentry *next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (!simple_positive(next)) { - spin_unlock(&next->d_lock); + if (!simple_positive(next)) continue; - } - spin_unlock(&next->d_lock); spin_unlock(&dentry->d_lock); if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) return 0; spin_lock(&dentry->d_lock); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ list_move(q, p); - spin_unlock(&next->d_lock); p = q; ctx->pos++; } From 4f42c1b5b9c27b6228e6b9c57eee4beb3118b6b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2016 19:37:13 -0400 Subject: [PATCH 0724/1050] libfs.c: new helper - next_positive() Return nth positive child after given or NULL if there's less than n left. dcache_readdir() and dcache_dir_lseek() switched to it. Signed-off-by: Al Viro --- fs/libfs.c | 77 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index f56acb1e5fbc..b05b74ae3f16 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -84,6 +84,39 @@ int dcache_dir_close(struct inode *inode, struct file *file) } EXPORT_SYMBOL(dcache_dir_close); +/* parent is locked at least shared */ +static struct dentry *next_positive(struct dentry *parent, + struct list_head *from, + int count) +{ + struct dentry *res = NULL; + struct list_head *p; + + spin_lock(&parent->d_lock); + for (p = from->next; p != &parent->d_subdirs; p = p->next) { + struct dentry *d = list_entry(p, struct dentry, d_child); + if (simple_positive(d) && !--count) { + res = d; + break; + } + } + spin_unlock(&parent->d_lock); + return res; +} + +static void move_cursor(struct dentry *cursor, struct list_head *after) +{ + struct dentry *parent = cursor->d_parent; + + spin_lock(&parent->d_lock); + __list_del(cursor->d_child.prev, cursor->d_child.next); + if (after) + list_add(&cursor->d_child, after); + else + list_add_tail(&cursor->d_child, &parent->d_subdirs); + spin_unlock(&parent->d_lock); +} + loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; @@ -99,24 +132,13 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { - struct list_head *p; struct dentry *cursor = file->private_data; + struct dentry *to; loff_t n = file->f_pos - 2; inode_lock_shared(dentry->d_inode); - spin_lock(&dentry->d_lock); - /* d_lock not required for cursor */ - list_del(&cursor->d_child); - p = dentry->d_subdirs.next; - while (n && p != &dentry->d_subdirs) { - struct dentry *next; - next = list_entry(p, struct dentry, d_child); - if (simple_positive(next)) - n--; - p = p->next; - } - list_add_tail(&cursor->d_child, p); - spin_unlock(&dentry->d_lock); + to = next_positive(dentry, &dentry->d_subdirs, n); + move_cursor(cursor, to ? &to->d_child : NULL); inode_unlock_shared(dentry->d_inode); } } @@ -140,30 +162,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p = &cursor->d_child; + struct dentry *next; + bool moved = false; if (!dir_emit_dots(file, ctx)) return 0; - spin_lock(&dentry->d_lock); + if (ctx->pos == 2) - list_move(q, &dentry->d_subdirs); - - for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_child); - if (!simple_positive(next)) - continue; - - spin_unlock(&dentry->d_lock); + p = &dentry->d_subdirs; + while ((next = next_positive(dentry, p, 1)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) - return 0; - spin_lock(&dentry->d_lock); - /* next is still alive */ - list_move(q, p); - p = q; + break; + moved = true; + p = &next->d_child; ctx->pos++; } - spin_unlock(&dentry->d_lock); + if (moved) + move_cursor(cursor, p); return 0; } EXPORT_SYMBOL(dcache_readdir); From ebaaa80e8f20ff2cbbccd6823f73a99565487502 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2016 20:55:34 -0400 Subject: [PATCH 0725/1050] lockless next_positive() Signed-off-by: Al Viro --- fs/libfs.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index b05b74ae3f16..74dc8b9e7f53 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -89,31 +89,53 @@ static struct dentry *next_positive(struct dentry *parent, struct list_head *from, int count) { - struct dentry *res = NULL; + unsigned *seq = &parent->d_inode->i_dir_seq, n; + struct dentry *res; struct list_head *p; + bool skipped; + int i; - spin_lock(&parent->d_lock); +retry: + i = count; + skipped = false; + n = smp_load_acquire(seq) & ~1; + res = NULL; + rcu_read_lock(); for (p = from->next; p != &parent->d_subdirs; p = p->next) { struct dentry *d = list_entry(p, struct dentry, d_child); - if (simple_positive(d) && !--count) { + if (!simple_positive(d)) { + skipped = true; + } else if (!--i) { res = d; break; } } - spin_unlock(&parent->d_lock); + rcu_read_unlock(); + if (skipped) { + smp_rmb(); + if (unlikely(*seq != n)) + goto retry; + } return res; } static void move_cursor(struct dentry *cursor, struct list_head *after) { struct dentry *parent = cursor->d_parent; - + unsigned n, *seq = &parent->d_inode->i_dir_seq; spin_lock(&parent->d_lock); + for (;;) { + n = *seq; + if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) + break; + cpu_relax(); + } __list_del(cursor->d_child.prev, cursor->d_child.next); if (after) list_add(&cursor->d_child, after); else list_add_tail(&cursor->d_child, &parent->d_subdirs); + smp_store_release(seq, n + 2); spin_unlock(&parent->d_lock); } From a5e9b85a6540df6c4074d3a56674f6fb6c5fc830 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:24:23 +0000 Subject: [PATCH 0726/1050] clk: Fix return value check in oxnas_stdclk_probe() In case of error, the function syscon_node_to_regmap() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Acked-by: Neil Armstrong Fixes: 0bbd72b4c64f ("clk: Add Oxford Semiconductor OXNAS Standard Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/clk-oxnas.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c index efba7d4dbcfc..79bcb2e42060 100644 --- a/drivers/clk/clk-oxnas.c +++ b/drivers/clk/clk-oxnas.c @@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev) return -ENOMEM; regmap = syscon_node_to_regmap(of_get_parent(np)); - if (!regmap) { + if (IS_ERR(regmap)) { dev_err(&pdev->dev, "failed to have parent regmap\n"); - return -EINVAL; + return PTR_ERR(regmap); } for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) { From 1b7e38b92b0bbd363369f5160f13f4d26140972d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 27 May 2016 16:09:25 +0200 Subject: [PATCH 0727/1050] drm: atmel-hlcdc: actually disable scaling when no scaling is required The driver is only enabling scaling, but never disabling it, thus, if you enable the scaling feature once it stays enabled forever. Signed-off-by: Boris Brezillon Reported-by: Alex Vazquez Reviewed-by: Nicolas Ferre Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support") Cc: --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index aef3ca8a81fa..016c191221f3 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane, atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, factor_reg); + } else { + atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0); } } From 0b1e1eb76220afa043b2733dfe61f5927cf0e458 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 3 Jun 2016 09:17:36 +0200 Subject: [PATCH 0728/1050] drm: atmel-hlcdc: Fix OF graph parsing atmel_hlcdc_create_outputs() iterates over OF graph nodes and releases the node (using of_node_put()) after each iteration, which is wrong since for_each_endpoint_of_node() is already taking care of that. Move the of_node_put() call in the error path. Signed-off-by: Boris Brezillon Reviewed-by: Nicolas Ferre Fixes: 17a8e03e7e97 ("drm: atmel-hlcdc: rework the output code to support drm bridges") --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c index 39802c0539b6..3d34fc4ca826 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c @@ -266,9 +266,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev) if (!ret) ret = atmel_hlcdc_check_endpoint(dev, &ep); - of_node_put(ep_np); - if (ret) + if (ret) { + of_node_put(ep_np); return ret; + } } for_each_endpoint_of_node(dev->dev->of_node, ep_np) { @@ -276,9 +277,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev) if (!ret) ret = atmel_hlcdc_attach_endpoint(dev, &ep); - of_node_put(ep_np); - if (ret) + if (ret) { + of_node_put(ep_np); return ret; + } } return 0; From 1d7b84d12af8312b52316029f1fa0fa4eac3c9e4 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 17 Jun 2016 18:21:01 +0800 Subject: [PATCH 0729/1050] drm/amd/powerplay: fix logic error. the error lead powerplay can't get display info in DGPU case. store_cc6_data just implement in APU. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index fa208ada6892..efb77eda7508 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -306,10 +306,14 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, { PHM_FUNC_CHECK(hwmgr); - if (hwmgr->hwmgr_func->store_cc6_data == NULL) + if (display_config == NULL) return -EINVAL; hwmgr->display_config = *display_config; + + if (hwmgr->hwmgr_func->store_cc6_data == NULL) + return -EINVAL; + /* to do pass other display configuration in furture */ if (hwmgr->hwmgr_func->store_cc6_data) From 576b4401b1971fe40be4cfd379430a61cd8426b2 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 13 Jun 2016 17:39:19 +0800 Subject: [PATCH 0730/1050] drm/amd/powerplay: fix bug that function parameter was incorect. Wrong value passed to acpi_pcie_perf_request. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Reviewed-by: Ken Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c index 58742e0d1492..d19a9b624242 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c @@ -77,7 +77,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise) ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &atcs_input, &atcs_output, - 0, + 1, sizeof(atcs_input), sizeof(atcs_output)); if (result != 0) From 0a4fef559b69ae2e682c98f31d53a225fbda78bd Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 14 Jun 2016 18:36:36 +0800 Subject: [PATCH 0731/1050] drm/amd/powerplay: need to notify system bios pcie device ready before request performance state. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Reviewed-by: Ken Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c | 16 +++++++++++++++- drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c index d19a9b624242..a3c38bbd1e94 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c @@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index) return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false; } +bool acpi_atcs_notify_pcie_device_ready(void *device) +{ + int32_t temp_buffer = 1; + + return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS, + ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, + &temp_buffer, + NULL, + 0, + sizeof(temp_buffer), + 0); +} + + int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise) { struct atcs_pref_req_input atcs_input; @@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise) int result; struct cgs_system_info info = {0}; - if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST)) + if( 0 != acpi_atcs_notify_pcie_device_ready(device)) return -EINVAL; info.size = sizeof(struct cgs_system_info); diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h index 3bd5e69b9045..3df5de2cdab0 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h @@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device, extern int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise); +extern bool acpi_atcs_notify_pcie_device_ready(void *device); From 919e334dec283294acd99951d016e59ad725c9a7 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 11 May 2016 17:04:07 +0800 Subject: [PATCH 0732/1050] drm/amd/powerplay: enable PowerContainment feature for polaris10/11. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 1400bc420881..07159958e77d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2606,6 +2606,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_FanSpeedInTableIsRPM); + if (hwmgr->chip_id == CHIP_POLARIS11) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SPLLShutdownSupport); @@ -2938,6 +2939,9 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE; data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE; + data->enable_tdc_limit_feature = true; + data->enable_pkg_pwr_tracking_feature = true; + if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2; From a2fb4934e960b11e4430dccc08d606c99910b447 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 13 Jun 2016 17:46:31 +0800 Subject: [PATCH 0733/1050] drm/amd/powerplay: initialize variables which were missed. Missing pcie dpm settings. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 1 + drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 586f73276226..92912ab20944 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c @@ -633,6 +633,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE; data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE; + data->force_pcie_gen = PP_PCIEGenInvalid; + if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 07159958e77d..643677fb5212 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2941,6 +2941,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->enable_tdc_limit_feature = true; data->enable_pkg_pwr_tracking_feature = true; + data->force_pcie_gen = PP_PCIEGenInvalid; if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index d27e8c40602a..233eb7f36c1d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c @@ -4489,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE; data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE; data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE; + data->force_pcie_gen = PP_PCIEGenInvalid; if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) { From 40787ef21c2889fc3d96a11775fa412e715d7d48 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 8 Jun 2016 19:41:00 +0800 Subject: [PATCH 0734/1050] drm/amd/powerplay: disable UVD SMU handshake for MCLK. sync up with internal programming recommendations. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 643677fb5212..5ecde13e4893 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2252,6 +2252,9 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr) static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) { struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t soft_register_value = 0; + uint32_t handshake_disables_offset = data->soft_regs_start + + offsetof(SMU74_SoftRegisters, HandshakeDisables); /* enable SCLK dpm */ if (!data->sclk_dpm_key_disabled) @@ -2262,6 +2265,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) /* enable MCLK dpm */ if (0 == data->mclk_dpm_key_disabled) { +/* Disable UVD - SMU handshake for MCLK. */ + soft_register_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, handshake_disables_offset); + soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + handshake_disables_offset, soft_register_value); PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr->smumgr, From 9a3c1b342be28a14006f644528dd9baad43db443 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 8 Jun 2016 19:42:48 +0800 Subject: [PATCH 0735/1050] drm/amd/powrplay: enable stutter_mode for polaris. To minimize the dram power expenditure during static -screen Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 5ecde13e4893..c2f5bec272c4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -1296,7 +1296,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, } mem_level->MclkFrequency = clock; - mem_level->StutterEnable = 0; mem_level->EnabledForThrottle = 1; mem_level->EnabledForActivity = 0; mem_level->UpHyst = 0; @@ -1363,7 +1362,7 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr) * a higher state by default such that we are not effected by * up threshold or and MCLK DPM latency. */ - levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target; + levels[0].ActivityLevel = 0x1f; CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel); data->smc_state_table.MemoryDpmLevelCount = @@ -2951,6 +2950,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->enable_tdc_limit_feature = true; data->enable_pkg_pwr_tracking_feature = true; data->force_pcie_gen = PP_PCIEGenInvalid; + data->mclk_stutter_mode_threshold = 40000; if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) From 31b21243776e1f41813f40ce16c465bf03acd9ba Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 7 Jun 2016 18:38:39 +0800 Subject: [PATCH 0736/1050] drm/amd/powerplay: add avfs related define for polaris Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu74.h | 75 +++++++++++++++++-- .../drm/amd/powerplay/inc/smu74_discrete.h | 42 ++++++++--- 2 files changed, 98 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h index 1a12d85b8e97..fd10a9fa843d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h @@ -34,6 +34,30 @@ #define SMU__NUM_LCLK_DPM_LEVELS 8 #define SMU__NUM_PCIE_DPM_LEVELS 8 +#define EXP_M1 35 +#define EXP_M2 92821 +#define EXP_B 66629747 + +#define EXP_M1_1 365 +#define EXP_M2_1 658700 +#define EXP_B_1 305506134 + +#define EXP_M1_2 189 +#define EXP_M2_2 379692 +#define EXP_B_2 194609469 + +#define EXP_M1_3 99 +#define EXP_M2_3 217915 +#define EXP_B_3 122255994 + +#define EXP_M1_4 51 +#define EXP_M2_4 122643 +#define EXP_B_4 74893384 + +#define EXP_M1_5 423 +#define EXP_M2_5 1103326 +#define EXP_B_5 728122621 + enum SID_OPTION { SID_OPTION_HI, SID_OPTION_LO, @@ -548,20 +572,20 @@ struct SMU74_Firmware_Header { uint32_t CacConfigTable; uint32_t CacStatusTable; - uint32_t mcRegisterTable; - uint32_t mcArbDramTimingTable; - - - uint32_t PmFuseTable; uint32_t Globals; uint32_t ClockStretcherTable; uint32_t VftTable; - uint32_t Reserved[21]; + uint32_t Reserved1; + uint32_t AvfsTable; + uint32_t AvfsCksOffGbvTable; + uint32_t AvfsMeanNSigma; + uint32_t AvfsSclkOffsetTable; + uint32_t Reserved[16]; uint32_t Signature; }; @@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */ struct SMU_ClockStretcherDataTableEntry { uint8_t minVID; uint8_t maxVID; - - uint16_t setting; }; typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry; @@ -769,6 +791,43 @@ struct VFT_TABLE_t { typedef struct VFT_TABLE_t VFT_TABLE_t; +/* Total margin, root mean square of Fmax + DC + Platform */ +struct AVFS_Margin_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_Margin_t AVFS_Margin_t; + +#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2 +#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2 + +struct GB_VDROOP_TABLE_t { + int32_t a0; + int32_t a1; + int32_t a2; + uint32_t spare; +}; +typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t; + +struct AVFS_CksOff_Gbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t; + +struct AVFS_meanNsigma_t { + uint32_t Aconstant[3]; + uint16_t DC_tol_sigma; + uint16_t Platform_mean; + uint16_t Platform_sigma; + uint16_t PSM_Age_CompFactor; + uint8_t Static_Voltage_Offset[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t; + +struct AVFS_Sclk_Offset_t { + uint16_t Sclk_Offset[8]; +}; +typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t; + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h index 0dfe82336dc7..b85ff5400e57 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h @@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo { typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo; +struct SMU_QuadraticCoeffs { + int32_t m1; + uint32_t b; + + int16_t m2; + uint8_t m1_shift; + uint8_t m2_shift; +}; +typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs; + struct SMU74_Discrete_DpmTable { SMU74_PIDController GraphicsPIDController; @@ -258,7 +268,14 @@ struct SMU74_Discrete_DpmTable { uint8_t ThermOutPolarity; uint8_t ThermOutMode; uint8_t BootPhases; - uint32_t Reserved[4]; + + uint8_t VRHotLevel; + uint8_t Reserved1[3]; + uint16_t FanStartTemperature; + uint16_t FanStopTemperature; + uint16_t MaxVoltage; + uint16_t Reserved2; + uint32_t Reserved[1]; SMU74_Discrete_GraphicsLevel GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS]; SMU74_Discrete_MemoryLevel MemoryACPILevel; @@ -347,6 +364,8 @@ struct SMU74_Discrete_DpmTable { uint32_t CurrSclkPllRange; sclkFcwRange_t SclkFcwRangeTable[NUM_SCLK_RANGE]; + GB_VDROOP_TABLE_t BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES]; + SMU_QuadraticCoeffs AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES]; }; typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable; @@ -550,16 +569,6 @@ struct SMU7_AcpiScoreboard { typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard; -struct SMU_QuadraticCoeffs { - int32_t m1; - uint32_t b; - - int16_t m2; - uint8_t m1_shift; - uint8_t m2_shift; -}; -typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs; - struct SMU74_Discrete_PmFuses { uint8_t BapmVddCVidHiSidd[8]; uint8_t BapmVddCVidLoSidd[8]; @@ -821,6 +830,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard; #define DB_PCC_SHIFT 26 #define DB_EDC_SHIFT 27 +#define BTCGB0_Vdroop_Enable_MASK 0x1 +#define BTCGB1_Vdroop_Enable_MASK 0x2 +#define AVFSGB0_Vdroop_Enable_MASK 0x4 +#define AVFSGB1_Vdroop_Enable_MASK 0x8 + +#define BTCGB0_Vdroop_Enable_SHIFT 0 +#define BTCGB1_Vdroop_Enable_SHIFT 1 +#define AVFSGB0_Vdroop_Enable_SHIFT 2 +#define AVFSGB1_Vdroop_Enable_SHIFT 3 + + #pragma pack(pop) From c11cb70483c33404d1c0cf9aa48e7627eecbe14d Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 8 Jun 2016 19:17:31 +0800 Subject: [PATCH 0737/1050] drm/amdgpu/atombios: add avfs struct for Polaris10/11 Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/atombios.h | 72 ++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 32f3e345de08..3493da5c8f0e 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -5538,6 +5538,78 @@ typedef struct _ATOM_ASIC_PROFILING_INFO_V3_5 ULONG ulReserved[12]; }ATOM_ASIC_PROFILING_INFO_V3_5; +/* for Polars10/11 AVFS parameters */ +typedef struct _ATOM_ASIC_PROFILING_INFO_V3_6 +{ + ATOM_COMMON_TABLE_HEADER asHeader; + ULONG ulMaxVddc; + ULONG ulMinVddc; + USHORT usLkgEuseIndex; + UCHAR ucLkgEfuseBitLSB; + UCHAR ucLkgEfuseLength; + ULONG ulLkgEncodeLn_MaxDivMin; + ULONG ulLkgEncodeMax; + ULONG ulLkgEncodeMin; + EFUSE_LINEAR_FUNC_PARAM sRoFuse; + ULONG ulEvvDefaultVddc; + ULONG ulEvvNoCalcVddc; + ULONG ulSpeed_Model; + ULONG ulSM_A0; + ULONG ulSM_A1; + ULONG ulSM_A2; + ULONG ulSM_A3; + ULONG ulSM_A4; + ULONG ulSM_A5; + ULONG ulSM_A6; + ULONG ulSM_A7; + UCHAR ucSM_A0_sign; + UCHAR ucSM_A1_sign; + UCHAR ucSM_A2_sign; + UCHAR ucSM_A3_sign; + UCHAR ucSM_A4_sign; + UCHAR ucSM_A5_sign; + UCHAR ucSM_A6_sign; + UCHAR ucSM_A7_sign; + ULONG ulMargin_RO_a; + ULONG ulMargin_RO_b; + ULONG ulMargin_RO_c; + ULONG ulMargin_fixed; + ULONG ulMargin_Fmax_mean; + ULONG ulMargin_plat_mean; + ULONG ulMargin_Fmax_sigma; + ULONG ulMargin_plat_sigma; + ULONG ulMargin_DC_sigma; + ULONG ulLoadLineSlop; + ULONG ulaTDClimitPerDPM[8]; + ULONG ulaNoCalcVddcPerDPM[8]; + ULONG ulAVFS_meanNsigma_Acontant0; + ULONG ulAVFS_meanNsigma_Acontant1; + ULONG ulAVFS_meanNsigma_Acontant2; + USHORT usAVFS_meanNsigma_DC_tol_sigma; + USHORT usAVFS_meanNsigma_Platform_mean; + USHORT usAVFS_meanNsigma_Platform_sigma; + ULONG ulGB_VDROOP_TABLE_CKSOFF_a0; + ULONG ulGB_VDROOP_TABLE_CKSOFF_a1; + ULONG ulGB_VDROOP_TABLE_CKSOFF_a2; + ULONG ulGB_VDROOP_TABLE_CKSON_a0; + ULONG ulGB_VDROOP_TABLE_CKSON_a1; + ULONG ulGB_VDROOP_TABLE_CKSON_a2; + ULONG ulAVFSGB_FUSE_TABLE_CKSOFF_m1; + USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2; + ULONG ulAVFSGB_FUSE_TABLE_CKSOFF_b; + ULONG ulAVFSGB_FUSE_TABLE_CKSON_m1; + USHORT usAVFSGB_FUSE_TABLE_CKSON_m2; + ULONG ulAVFSGB_FUSE_TABLE_CKSON_b; + USHORT usMaxVoltage_0_25mv; + UCHAR ucEnableGB_VDROOP_TABLE_CKSOFF; + UCHAR ucEnableGB_VDROOP_TABLE_CKSON; + UCHAR ucEnableGB_FUSE_TABLE_CKSOFF; + UCHAR ucEnableGB_FUSE_TABLE_CKSON; + USHORT usPSM_Age_ComFactor; + UCHAR ucEnableApplyAVFS_CKS_OFF_Voltage; + UCHAR ucReserved; +}ATOM_ASIC_PROFILING_INFO_V3_6; + typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{ ULONG ulMaxSclkFreq; From 432c3a3ca794bd2301425cb58bf097fc26690c17 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 8 Jun 2016 19:39:42 +0800 Subject: [PATCH 0738/1050] drm/amd/powerplay: enable avfs feature for polaris avfs feature is for voltage control based on gpu system clock on polaris10 Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 90 ++++++++++++++++++- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.h | 3 + .../amd/powerplay/hwmgr/polaris10_thermal.c | 6 +- .../gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c | 43 +++++++++ .../gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h | 32 +++++++ .../drm/amd/powerplay/inc/polaris10_ppsmc.h | 1 + .../amd/powerplay/smumgr/polaris10_smumgr.c | 50 ++++++++--- 7 files changed, 208 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index c2f5bec272c4..f730ec8b529d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -1303,7 +1303,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, mem_level->VoltageDownHyst = 0; mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; mem_level->StutterEnable = false; - mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; data->display_timing.num_existing_displays = info.display_count; @@ -1955,6 +1954,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr, return 0; } + +int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) +{ + struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + SMU74_Discrete_DpmTable *table = &(data->smc_state_table); + int result = 0; + struct pp_atom_ctrl__avfs_parameters avfs_params = {0}; + AVFS_meanNsigma_t AVFS_meanNsigma = { {0} }; + AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} }; + uint32_t tmp, i; + struct pp_smumgr *smumgr = hwmgr->smumgr; + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return result; + + result = atomctrl_get_avfs_information(hwmgr, &avfs_params); + + if (0 == result) { + table->BTCGB_VDROOP_TABLE[0].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); + table->BTCGB_VDROOP_TABLE[0].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); + table->BTCGB_VDROOP_TABLE[0].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); + table->BTCGB_VDROOP_TABLE[1].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0); + table->BTCGB_VDROOP_TABLE[1].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1); + table->BTCGB_VDROOP_TABLE[1].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2); + table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1); + table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2); + table->AVFSGB_VDROOP_TABLE[0].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b); + table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24; + table->AVFSGB_VDROOP_TABLE[0].m2_shift = 12; + table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1); + table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2); + table->AVFSGB_VDROOP_TABLE[1].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b); + table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24; + table->AVFSGB_VDROOP_TABLE[1].m2_shift = 12; + table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv); + AVFS_meanNsigma.Aconstant[0] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0); + AVFS_meanNsigma.Aconstant[1] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1); + AVFS_meanNsigma.Aconstant[2] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2); + AVFS_meanNsigma.DC_tol_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma); + AVFS_meanNsigma.Platform_mean = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean); + AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor); + AVFS_meanNsigma.Platform_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma); + + for (i = 0; i < NUM_VFT_COLUMNS; i++) { + AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625); + AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100); + } + + result = polaris10_read_smc_sram_dword(smumgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma), + &tmp, data->sram_end); + + polaris10_copy_bytes_to_smc(smumgr, + tmp, + (uint8_t *)&AVFS_meanNsigma, + sizeof(AVFS_meanNsigma_t), + data->sram_end); + + result = polaris10_read_smc_sram_dword(smumgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable), + &tmp, data->sram_end); + polaris10_copy_bytes_to_smc(smumgr, + tmp, + (uint8_t *)&AVFS_SclkOffset, + sizeof(AVFS_Sclk_Offset_t), + data->sram_end); + + data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT); + data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false; + } + return result; +} + + /** * Initializes the SMC table and uploads it * @@ -2055,6 +2138,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to populate Clock Stretcher Data Table!", return result); } + + result = polaris10_populate_avfs_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;); + table->CurrSclkPllRange = 0xff; table->GraphicsVoltageChangeEnable = 1; table->GraphicsThermThrottleEnable = 1; @@ -2277,7 +2364,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) "Failed to enable MCLK DPM during DPM Start Function!", return -1); - PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h index beedf35cbfa6..d717789441f5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h @@ -312,6 +312,9 @@ struct polaris10_hwmgr { /* soft pptable for re-uploading into smu */ void *soft_pp_table; + + uint32_t avfs_vdroop_override_setting; + bool apply_avfs_cks_off_voltage; }; /* To convert to Q8.8 format for firmware */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c index aba167f7d167..b206632d4650 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c @@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr, int ret; struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr); struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); + struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); - if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS) + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) return 0; + ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting); + ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ? 0 : -1; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c index da9f5f1b6dc2..bf4e18fd3872 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c @@ -1302,3 +1302,46 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr return 0; } + +int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param) +{ + ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL; + + if (param == NULL) + return -EINVAL; + + profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *) + cgs_atom_get_data_table(hwmgr->device, + GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), + NULL, NULL, NULL); + if (!profile) + return -1; + + param->ulAVFS_meanNsigma_Acontant0 = profile->ulAVFS_meanNsigma_Acontant0; + param->ulAVFS_meanNsigma_Acontant1 = profile->ulAVFS_meanNsigma_Acontant1; + param->ulAVFS_meanNsigma_Acontant2 = profile->ulAVFS_meanNsigma_Acontant2; + param->usAVFS_meanNsigma_DC_tol_sigma = profile->usAVFS_meanNsigma_DC_tol_sigma; + param->usAVFS_meanNsigma_Platform_mean = profile->usAVFS_meanNsigma_Platform_mean; + param->usAVFS_meanNsigma_Platform_sigma = profile->usAVFS_meanNsigma_Platform_sigma; + param->ulGB_VDROOP_TABLE_CKSOFF_a0 = profile->ulGB_VDROOP_TABLE_CKSOFF_a0; + param->ulGB_VDROOP_TABLE_CKSOFF_a1 = profile->ulGB_VDROOP_TABLE_CKSOFF_a1; + param->ulGB_VDROOP_TABLE_CKSOFF_a2 = profile->ulGB_VDROOP_TABLE_CKSOFF_a2; + param->ulGB_VDROOP_TABLE_CKSON_a0 = profile->ulGB_VDROOP_TABLE_CKSON_a0; + param->ulGB_VDROOP_TABLE_CKSON_a1 = profile->ulGB_VDROOP_TABLE_CKSON_a1; + param->ulGB_VDROOP_TABLE_CKSON_a2 = profile->ulGB_VDROOP_TABLE_CKSON_a2; + param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1; + param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2; + param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b; + param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSON_m1; + param->usAVFSGB_FUSE_TABLE_CKSON_m2 = profile->usAVFSGB_FUSE_TABLE_CKSON_m2; + param->ulAVFSGB_FUSE_TABLE_CKSON_b = profile->ulAVFSGB_FUSE_TABLE_CKSON_b; + param->usMaxVoltage_0_25mv = profile->usMaxVoltage_0_25mv; + param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF; + param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON; + param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF; + param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON; + param->usPSM_Age_ComFactor = profile->usPSM_Age_ComFactor; + param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage; + + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h index d24ebb566905..248c5db5f380 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h @@ -250,6 +250,35 @@ struct pp_atomctrl_gpio_pin_assignment { }; typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment; +struct pp_atom_ctrl__avfs_parameters { + uint32_t ulAVFS_meanNsigma_Acontant0; + uint32_t ulAVFS_meanNsigma_Acontant1; + uint32_t ulAVFS_meanNsigma_Acontant2; + uint16_t usAVFS_meanNsigma_DC_tol_sigma; + uint16_t usAVFS_meanNsigma_Platform_mean; + uint16_t usAVFS_meanNsigma_Platform_sigma; + uint32_t ulGB_VDROOP_TABLE_CKSOFF_a0; + uint32_t ulGB_VDROOP_TABLE_CKSOFF_a1; + uint32_t ulGB_VDROOP_TABLE_CKSOFF_a2; + uint32_t ulGB_VDROOP_TABLE_CKSON_a0; + uint32_t ulGB_VDROOP_TABLE_CKSON_a1; + uint32_t ulGB_VDROOP_TABLE_CKSON_a2; + uint32_t ulAVFSGB_FUSE_TABLE_CKSOFF_m1; + uint16_t usAVFSGB_FUSE_TABLE_CKSOFF_m2; + uint32_t ulAVFSGB_FUSE_TABLE_CKSOFF_b; + uint32_t ulAVFSGB_FUSE_TABLE_CKSON_m1; + uint16_t usAVFSGB_FUSE_TABLE_CKSON_m2; + uint32_t ulAVFSGB_FUSE_TABLE_CKSON_b; + uint16_t usMaxVoltage_0_25mv; + uint8_t ucEnableGB_VDROOP_TABLE_CKSOFF; + uint8_t ucEnableGB_VDROOP_TABLE_CKSON; + uint8_t ucEnableGB_FUSE_TABLE_CKSOFF; + uint8_t ucEnableGB_FUSE_TABLE_CKSON; + uint16_t usPSM_Age_ComFactor; + uint8_t ucEnableApplyAVFS_CKS_OFF_Voltage; + uint8_t ucReserved; +}; + extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment); extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage); extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr); @@ -278,5 +307,8 @@ extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clo extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage); extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table); + +extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h index 0c6a413eaa5b..d41d37ab5b7c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h @@ -27,6 +27,7 @@ #pragma pack(push, 1) +#define PPSMC_MSG_SetGBDroopSettings ((uint16_t) 0x305) #define PPSMC_SWSTATE_FLAG_DC 0x01 #define PPSMC_SWSTATE_FLAG_UVD 0x02 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 043b6ac09d5f..5dba7c509710 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -52,19 +52,18 @@ static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = { /* Min pcie DeepSleep Activity CgSpll CgSpll CcPwr CcPwr Sclk Enabled Enabled Voltage Power */ /* Voltage, DpmLevel, DivId, Level, FuncCntl3, FuncCntl4, DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */ - { 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } }, - { 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } }, - { 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } }, - { 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } }, - { 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } }, - { 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } }, - { 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } }, - { 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } } + { 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } }, + { 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } }, + { 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }, + { 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } }, + { 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } }, + { 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } }, + { 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } }, + { 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } } }; static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 = - {0x50140000, 0x50140000, 0x00320000, 0x00, 0x00, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00}; + {0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00}; /** * Set the address for reading/writing the SMC SRAM space. @@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr) && (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C))); } +static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr) +{ + uint32_t efuse; + + efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4)); + efuse &= 0x00000001; + if (efuse) + return true; + + return false; +} + /** * Send a message to the SMC, and wait for its response. * @@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr) */ int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg) { + int ret; + if (!polaris10_is_smc_ram_running(smumgr)) return -1; + SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0); - if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP)) - printk("Failed to send Previous Message.\n"); + ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP); + if (ret != 1) + printk("\n failed to send pre message %x ret is %d \n", msg, ret); cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg); SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0); - if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP)) - printk("Failed to send Message.\n"); + ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP); + + if (ret != 1) + printk("\n failed to send message %x ret is %d \n", msg, ret); return 0; } @@ -953,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr) (cgs_handle_t)smu_data->smu_buffer.handle); return -1;); + if (polaris10_is_hw_avfs_present(smumgr)) + smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT; + else + smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED; + return 0; } From 92d1576859577b94eafaea9b64f78ab99fe20a78 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 7 Jun 2016 14:09:56 +0800 Subject: [PATCH 0739/1050] drm/amdgpu/gfx8: update golden setting for polaris10 Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 9f6f8669edc3..1a5cbaff1e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -297,7 +297,8 @@ static const u32 polaris11_golden_common_all[] = static const u32 golden_settings_polaris10_a11[] = { mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, - mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, + mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0, 0x0f000000, mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, mmDB_DEBUG2, 0xf00fffff, 0x00000400, mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, From 270d013659ddab52a6fd0eacae452c422d08aa39 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 7 Jun 2016 18:39:06 +0800 Subject: [PATCH 0740/1050] drm/amd/powerplay: enable clock stretch feature for polaris Power saving feature which reduces the amount of voltage needed for specific engine clocks. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 120 ++++-------------- 1 file changed, 23 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index f730ec8b529d..64ee78f7d41e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -1759,12 +1759,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr) static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) { - uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks, - volt_with_cks, value; - uint16_t clock_freq_u16; + uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); - uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2, - volt_offset = 0; + uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0; struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = @@ -1776,50 +1773,38 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) * if the part is SS or FF. if RO >= 1660MHz, part is FF. */ efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (146 * 4)); - efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (148 * 4)); + ixSMU_EFUSE_0 + (67 * 4)); efuse &= 0xFF000000; efuse = efuse >> 24; - efuse2 &= 0xF; - if (efuse2 == 1) - ro = (2300 - 1350) * efuse / 255 + 1350; - else - ro = (2500 - 1000) * efuse / 255 + 1000; + if (hwmgr->chip_id == CHIP_POLARIS10) { + min = 1000; + max = 2300; + } else { + min = 1100; + max = 2100; + } - if (ro >= 1660) - type = 0; - else - type = 1; - - /* Populate Stretch amount */ - data->smc_state_table.ClockStretcherAmount = stretch_amount; + ro = efuse * (max -min)/255 + min; /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ for (i = 0; i < sclk_table->count; i++) { data->smc_state_table.Sclk_CKS_masterEn0_7 |= sclk_table->entries[i].cks_enable << i; - volt_without_cks = (uint32_t)((14041 * - (sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 / - (4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000))); - volt_with_cks = (uint32_t)((13946 * - (sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 / - (3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000))); + + volt_without_cks = (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \ + (sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100); + + volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \ + (sclk_table->entries[i].clk/10000 * 649434 /1000 - 18005)/10); + if (volt_without_cks >= volt_with_cks) volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); + data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; } - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - STRETCH_ENABLE, 0x0); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x1); - /* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */ - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x0); - /* Populate CKS Lookup Table */ if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) stretch_amount2 = 0; @@ -1833,69 +1818,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) return -EINVAL); } - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL); - value &= 0xFFC2FF87; - data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq = - polaris10_clock_stretcher_lookup_table[stretch_amount2][0]; - data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq = - polaris10_clock_stretcher_lookup_table[stretch_amount2][1]; - clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table. - GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100); - if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16 - && polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) { - /* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */ - value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16; - /* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */ - value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18; - /* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */ - value |= (polaris10_clock_stretch_amount_conversion - [polaris10_clock_stretcher_lookup_table[stretch_amount2][3]] - [stretch_amount]) << 3; - } - CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq); - CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq); - data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting = - polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F; - data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |= - (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7; - - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL, value); - - /* Populate DDT Lookup Table */ - for (i = 0; i < 4; i++) { - /* Assign the minimum and maximum VID stored - * in the last row of Clock Stretcher Voltage Table. - */ - data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID = - (uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2]; - data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID = - (uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3]; - /* Loop through each SCLK and check the frequency - * to see if it lies within the frequency for clock stretcher. - */ - for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) { - cks_setting = 0; - clock_freq = PP_SMC_TO_HOST_UL( - data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency); - /* Check the allowed frequency against the sclk level[j]. - * Sclk's endianness has already been converted, - * and it's in 10Khz unit, - * as opposed to Data table, which is in Mhz unit. - */ - if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) { - cks_setting |= 0x2; - if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100) - cks_setting |= 0x1; - } - data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting - |= cks_setting << (j * 2); - } - CONVERT_FROM_HOST_TO_SMC_US( - data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting); - } - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); value &= 0xFFFFFFFE; cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); @@ -3062,6 +2984,10 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2; } + if (table_info->cac_dtp_table->usClockStretchAmount != 0) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + polaris10_set_features_platform_caps(hwmgr); polaris10_init_dpm_defaults(hwmgr); From 34511dce4b35685d3988d5c8b100d11a068db5bd Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Mon, 20 Jun 2016 11:10:26 +0300 Subject: [PATCH 0741/1050] drm/i915: Revert DisplayPort fast link training feature It has been found out that in some HW combination the DisplayPort fast link training feature caused screen flickering. Let's revert this feature for now until we can ensure that the feature works for all platforms. This is a manual revert of commits 5fa836a9d859 ("drm/i915: DP link training optimization") and 4e96c97742f4 ("drm/i915: eDP link training optimization"). Fixes: 5fa836a9d859 ("drm/i915: DP link training optimization") Fixes: 4e96c97742f4 ("drm/i915: eDP link training optimization") Cc: # v4.2+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91393 Reviewed-by: Jani Nikula Signed-off-by: Mika Kahola Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1466410226-19543-1-git-send-email-mika.kahola@intel.com (cherry picked from commit 91df09d92ad82c8778ca218097bf827f154292ca) --- drivers/gpu/drm/i915/intel_dp.c | 3 --- drivers/gpu/drm/i915/intel_dp_link_training.c | 26 ++----------------- drivers/gpu/drm/i915/intel_drv.h | 2 -- 3 files changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ffe5f8430957..79cf2d5f5a20 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4977,9 +4977,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) intel_display_power_get(dev_priv, power_domain); if (long_hpd) { - /* indicate that we need to restart link training */ - intel_dp->train_set_valid = false; - intel_dp_long_pulse(intel_dp->attached_connector); if (intel_dp->is_mst) ret = IRQ_HANDLED; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 0b8eefc2acc5..60fb39cd220b 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -85,8 +85,7 @@ static bool intel_dp_reset_link_train(struct intel_dp *intel_dp, uint8_t dp_train_pat) { - if (!intel_dp->train_set_valid) - memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); + memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); intel_dp_set_signal_levels(intel_dp); return intel_dp_set_link_train(intel_dp, dp_train_pat); } @@ -161,23 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) break; } - /* - * if we used previously trained voltage and pre-emphasis values - * and we don't get clock recovery, reset link training values - */ - if (intel_dp->train_set_valid) { - DRM_DEBUG_KMS("clock recovery not ok, reset"); - /* clear the flag as we are not reusing train set */ - intel_dp->train_set_valid = false; - if (!intel_dp_reset_link_train(intel_dp, - DP_TRAINING_PATTERN_1 | - DP_LINK_SCRAMBLING_DISABLE)) { - DRM_ERROR("failed to enable link training\n"); - return; - } - continue; - } - /* Check to see if we've tried the max voltage */ for (i = 0; i < intel_dp->lane_count; i++) if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0) @@ -284,7 +266,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) /* Make sure clock is still ok */ if (!drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) { - intel_dp->train_set_valid = false; intel_dp_link_training_clock_recovery(intel_dp); intel_dp_set_link_train(intel_dp, training_pattern | @@ -301,7 +282,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) /* Try 5 times, then try clock recovery if that fails */ if (tries > 5) { - intel_dp->train_set_valid = false; intel_dp_link_training_clock_recovery(intel_dp); intel_dp_set_link_train(intel_dp, training_pattern | @@ -322,10 +302,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) intel_dp_set_idle_link_train(intel_dp); - if (channel_eq) { - intel_dp->train_set_valid = true; + if (channel_eq) DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n"); - } } void intel_dp_stop_link_train(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4a24b0067a3a..f7f0f01814f6 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -863,8 +863,6 @@ struct intel_dp { /* This is called before a link training is starterd */ void (*prepare_link_retrain)(struct intel_dp *intel_dp); - bool train_set_valid; - /* Displayport compliance testing */ unsigned long compliance_test_type; unsigned long compliance_test_data; From 1e3fa0acfec677e915d7de5ac6e1f18cfa4f805b Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 9 Jun 2016 11:58:15 -0400 Subject: [PATCH 0742/1050] drm/i915/fbc: Disable on HSW by default for now >From https://bugs.freedesktop.org/show_bug.cgi?id=96461 : This was kind of a difficult bug to track down. If you're using a Haswell system running GNOME and you have fbc completely enabled and working, playing videos can result in video artifacts. Steps to reproduce: - Run GNOME - Ensure FBC is enabled and active - Download a movie, I used the ogg version of Big Buck Bunny for this - Run `gst-launch-1.0 filesrc location='some_movie.ogg' ! decodebin ! glimagesink` in a terminal - Watch for about over a minute, you'll see small horizontal lines go down the screen. For the time being, disable FBC for Haswell by default. Stefan Richter reported kernel freezes (no video artifacts) when fbc is on. (E3-1245 v3 with HD P4600; openbox and some KDE and LXDE applications, thread begins at https://lkml.org/lkml/2016/4/26/813). We also got reports from Steven Honeyman on openbox+roxterm. v2 (From Paulo): - Add extra information to the commit message - Add Fixes tag - Rebase Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96461 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96464 Fixes: a98ee79317b4 ("drm/i915/fbc: enable FBC by default on HSW and BDW") Cc: stable@vger.kernel.org Reviewed-by: Paulo Zanoni Signed-off-by: Lyude Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1465487895-7401-1-git-send-email-cpaul@redhat.com (cherry picked from commit c7f7e2feffb0294302041507dfd5fc15f01afccc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index d5a7cfec589b..647127f3aaff 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -824,8 +824,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; struct intel_fbc *fbc = &dev_priv->fbc; - bool enable_by_default = IS_HASWELL(dev_priv) || - IS_BROADWELL(dev_priv); + bool enable_by_default = IS_BROADWELL(dev_priv); if (intel_vgpu_active(dev_priv->dev)) { fbc->no_fbc_reason = "VGPU is active"; From f7e0efc9b50266f5317b50732efff98d40fc879a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 17 Jun 2016 18:33:00 +0100 Subject: [PATCH 0743/1050] arm64: update ASID limit During a rollover, we mark the active ASID on each CPU as reserved, before allocating a new ID for the task that caused the rollover. This means that with N CPUs, we can only guarantee the new task to obtain a valid ASID if we have at least N+1 ASIDs. Update this limit in the initcall check. Note that this restriction was introduced by commit 8e648066 on the arch/arm side, which disallow re-using the previously active ASID on the local CPU, as it would introduce a TLB race. In addition, we only dispose of NUM_USER_ASIDS-1, since ASID 0 is reserved. Add this restriction as well. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b7b397802088..efcf1f7ef1e4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) &asid_generation); flush_context(cpu); - /* We have at least 1 ASID per CPU, so this will always succeed */ + /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); set_asid: @@ -227,8 +227,11 @@ switch_mm_fastpath: static int asids_init(void) { asid_bits = get_cpu_asid_bits(); - /* If we end up with more CPUs than ASIDs, expect things to crash */ - WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. + */ + WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), GFP_KERNEL); From 9ca4e58c20d5cb2a5bc378238188c71317aceac5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Jun 2016 10:44:00 +0900 Subject: [PATCH 0744/1050] arm64: fix boot image dependencies to not generate invalid images I fixed boot image dependencies for arch/arm in commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images"). I see a similar problem for arch/arm64; "make -jN Image Image.gz" would sometimes end up generating bad images where N > 1. Fix the dependency in arch/arm64/Makefile to avoid the race between "make Image" and "make Image.*". Acked-by: Catalin Marinas Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e322dc42..648a32c89541 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,7 +95,7 @@ boot := arch/arm64/boot Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -Image.%: vmlinux +Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: From 20c27a4270c775d7ed661491af8ac03264d60fc6 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 21 Jun 2016 15:32:57 +0800 Subject: [PATCH 0745/1050] arm64: mm: remove page_mapping check in __sync_icache_dcache __sync_icache_dcache unconditionally skips the cache maintenance for anonymous pages, under the assumption that flushing is only required in the presence of D-side aliases [see 7249b79f6b4cc ("arm64: Do not flush the D-cache for anonymous pages")]. Unfortunately, this breaks migration of anonymous pages holding self-modifying code, where userspace cannot be reasonably expected to reissue maintenance instructions in response to a migration. This patch fixes the problem by removing the broken page_mapping(page) check from the cache syncing code, otherwise we may end up fetching and executing stale instructions from the PoU. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Reviewed-by: Catalin Marinas Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dbd12ea8ce68..43a76b07eb32 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - /* no flushing needed for anonymous pages */ - if (!page_mapping(page)) - return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); From 2f38b1b16d9280689e5cfa47a4c50956bf437f0d Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 21 Jun 2016 12:34:15 +0800 Subject: [PATCH 0746/1050] ACPICA: Namespace: Fix deadlock triggered by MLC support in dynamic table loading The new module-level code (MLC) approach invokes MLC on the per-table basis, but the dynamic loading support of this is incorrect because of the lock order: acpi_ns_evaluate acpi_ex_enter_intperter acpi_ns_load_table (triggered by Load opcode) acpi_ns_exec_module_code_list acpi_ex_enter_intperter The regression is introduced by the following commit: Commit: 2785ce8d0da1cac9d8f78615e116cf929e9a9123 ACPICA Commit: 071eff738c59eda1792ac24b3b688b61691d7e7c Subject: ACPICA: Add per-table execution of module-level code This patch fixes this regression by unlocking the interpreter lock before invoking MLC. However, the unlocking is done to the acpi_ns_load_table(), in which the interpreter lock should be locked by acpi_ns_parse_table() but it wasn't. Fixes: 2785ce8d0da1 (ACPICA: Add per-table execution of module-level code) Reported-by: Mika Westerberg Tested-by: Mika Westerberg Signed-off-by: Lv Zheng Cc: 4.5+ # 4.5+ [ rjw : Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/exconfig.c | 2 ++ drivers/acpi/acpica/nsparse.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index a1d177d58254..21932d640a41 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -108,7 +108,9 @@ acpi_ex_add_table(u32 table_index, /* Add the table to the namespace */ + acpi_ex_exit_interpreter(); status = acpi_ns_load_table(table_index, parent_node); + acpi_ex_enter_interpreter(); if (ACPI_FAILURE(status)) { acpi_ut_remove_reference(obj_desc); *ddb_handle = NULL; diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index f631a47724f0..1783cd7e1446 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -47,6 +47,7 @@ #include "acparser.h" #include "acdispat.h" #include "actables.h" +#include "acinterp.h" #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsparse") @@ -170,6 +171,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) ACPI_FUNCTION_TRACE(ns_parse_table); + acpi_ex_enter_interpreter(); + /* * AML Parse, pass 1 * @@ -185,7 +188,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1, table_index, start_node); if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); + goto error_exit; } /* @@ -201,8 +204,10 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2, table_index, start_node); if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); + goto error_exit; } +error_exit: + acpi_ex_exit_interpreter(); return_ACPI_STATUS(status); } From 3e1d7fb0d279fea19eb4e36cc9bddf89264ba03f Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Fri, 2 Oct 2015 12:39:23 +0900 Subject: [PATCH 0747/1050] PM / devfreq: devm_kzalloc to have dev pointer more precisely devm_kzalloc of devfreq's statistics data structure has been using its parent device as the dev allocated for. If a device's devfreq is disabled in run-time, such allocated memory won't be freed. Desginating more precisely with the devfreq device pointer fixes the issue. Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 1d6c803804d5..380173738a1d 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -561,15 +561,6 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); } - devfreq->trans_table = devm_kzalloc(dev, sizeof(unsigned int) * - devfreq->profile->max_state * - devfreq->profile->max_state, - GFP_KERNEL); - devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) * - devfreq->profile->max_state, - GFP_KERNEL); - devfreq->last_stat_updated = jiffies; - dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { @@ -578,6 +569,15 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_out; } + devfreq->trans_table = devm_kzalloc(&devfreq->dev, sizeof(unsigned int) * + devfreq->profile->max_state * + devfreq->profile->max_state, + GFP_KERNEL); + devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) * + devfreq->profile->max_state, + GFP_KERNEL); + devfreq->last_stat_updated = jiffies; + srcu_init_notifier_head(&devfreq->transition_notifier_list); mutex_unlock(&devfreq->lock); From ac4b281176a54d17dd3f91b7fb4a4656471d8730 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Fri, 2 Oct 2015 12:48:54 +0900 Subject: [PATCH 0748/1050] PM / devfreq: fix duplicated kfree on devfreq pointer device_unregister() calls kfree already. Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 380173738a1d..6f33c1e89a14 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -603,7 +603,6 @@ struct devfreq *devfreq_add_device(struct device *dev, err_init: list_del(&devfreq->node); device_unregister(&devfreq->dev); - kfree(devfreq); err_out: return ERR_PTR(err); } From a5e9b937fa91b3f404618b5281c3239bd1f09c7b Mon Sep 17 00:00:00 2001 From: Cai Zhiyong Date: Sat, 14 May 2016 14:13:30 +0800 Subject: [PATCH 0749/1050] PM / devfreq: fix double call put_device 1295 */ 1296 void device_unregister(struct device *dev) 1297 { 1298 pr_debug("device: '%s': %s\n", dev_name(dev), __func__); 1299 device_del(dev); 1300 put_device(dev); 1301 } 1302 EXPORT_SYMBOL_GPL(device_unregister); 1303 device_unregister is called put_device, there is no need to call put_device(&devfreq->dev) again. Signed-off-by: Cai Zhiyong Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 6f33c1e89a14..3e1afb4ac77c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -620,7 +620,6 @@ int devfreq_remove_device(struct devfreq *devfreq) return -EINVAL; device_unregister(&devfreq->dev); - put_device(&devfreq->dev); return 0; } From 67ffdb529b4ec9833f73947dec01eaa78dd53c3d Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Mon, 16 May 2016 11:41:57 +0900 Subject: [PATCH 0750/1050] PM / devfreq: remove double put_device When device_register() returns with error, it has already done put_device() on the input device pointer. Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 3e1afb4ac77c..0ebd64dfc0a9 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -564,7 +564,6 @@ struct devfreq *devfreq_add_device(struct device *dev, dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { - put_device(&devfreq->dev); mutex_unlock(&devfreq->lock); goto err_out; } From 674789dd2c4b53ed368dc81ae22d72ac4fdb92ec Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2016 09:45:42 +0300 Subject: [PATCH 0751/1050] PM / devfreq: exynos-nocp: Remove incorrect IS_ERR() check Smatch complains because platform_get_resource() returns NULL on error and not an error pointer so the check is wrong. Julia Lawall pointed out that normally we don't check these, because devm_ioremap_resource() has a check for NULL. Signed-off-by: Dan Carpenter Reviewed-by: Julia Lawall Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/exynos-nocp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c index 6b6a5f310486..a5841403bde8 100644 --- a/drivers/devfreq/event/exynos-nocp.c +++ b/drivers/devfreq/event/exynos-nocp.c @@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev, /* Maps the memory mapped IO to control nocp register */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (IS_ERR(res)) - return PTR_ERR(res); - base = devm_ioremap_resource(dev, res); if (IS_ERR(base)) return PTR_ERR(base); From 8d39fc085d268a56486066a3deca94745f804f2d Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 31 May 2016 11:25:09 +0100 Subject: [PATCH 0752/1050] PM / devfreq: fix initialization of current frequency in last status Some systems need current frequency from last_status for calculation but it is zeroed during initialization. When the device starts there is no history, but we can assume that the last frequency was the same as the initial frequency (which is also used in 'previous_freq'). The log shows the result of this misinterpreted value. [ 2.042847] ... Failed to get voltage for frequency 0: -34 Signed-off-by: Lukasz Luba Reviewed-by: Javi Merino Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0ebd64dfc0a9..c7f47e34807b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -552,6 +552,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->profile = profile; strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; + devfreq->last_status.current_frequency = profile->initial_freq; devfreq->data = data; devfreq->nb.notifier_call = devfreq_notifier_call; From 04e59a0211ff012ba60c00baca673482570784e9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 18 Jun 2016 11:31:33 +0200 Subject: [PATCH 0753/1050] phy-sun4i-usb: Fix irq free conditions to match request conditions commit 5cf700ac9d50 ("phy: phy-sun4i-usb: Fix optional gpios failing probe") changed the condition under which irqs are requested, but omitted matching changes to sun4i_usb_phy_remove(). This commit fixes this. Fixes: 5cf700ac9d50 ("phy: phy-sun4i-usb: Fix optional gpios failing probe") Signed-off-by: Hans de Goede Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-sun4i-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index 739029430dda..de3101fbbf40 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -514,9 +514,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev) if (data->vbus_power_nb_registered) power_supply_unreg_notifier(&data->vbus_power_nb); - if (data->id_det_irq >= 0) + if (data->id_det_irq > 0) devm_free_irq(dev, data->id_det_irq, data); - if (data->vbus_det_irq >= 0) + if (data->vbus_det_irq > 0) devm_free_irq(dev, data->vbus_det_irq, data); cancel_delayed_work_sync(&data->detect); From c9058d43d99404986133112393be1d1a4daf1b69 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 21 Jun 2016 19:18:32 +0100 Subject: [PATCH 0754/1050] ALSA: hda/tegra: iomem fixups for sparse warnings The readl/writel are not being passed __iomem annotated variables, so fix the following sparse warnings by adding __iomem in: sound/pci/hda/hda_tegra.c:120:9: warning: incorrect type in argument 2 (different address spaces) sound/pci/hda/hda_tegra.c:120:9: expected void volatile [noderef] *addr sound/pci/hda/hda_tegra.c:120:9: got unsigned int [usertype] *addr sound/pci/hda/hda_tegra.c:125:16: warning: incorrect type in argument 1 (different address spaces) sound/pci/hda/hda_tegra.c:125:16: expected void const volatile [noderef] *addr sound/pci/hda/hda_tegra.c:125:16: got unsigned int [usertype] *addr sound/pci/hda/hda_tegra.c:134:13: warning: incorrect type in argument 1 (different address spaces) sound/pci/hda/hda_tegra.c:134:13: expected void const volatile [noderef] *addr sound/pci/hda/hda_tegra.c:134:13: got void *dword_addr sound/pci/hda/hda_tegra.c:137:9: warning: incorrect type in argument 2 (different address spaces) sound/pci/hda/hda_tegra.c:137:9: expected void volatile [noderef] *addr sound/pci/hda/hda_tegra.c:137:9: got void *dword_addr sound/pci/hda/hda_tegra.c:146:13: warning: incorrect type in argument 1 (different address spaces) sound/pci/hda/hda_tegra.c:146:13: expected void const volatile [noderef] *addr sound/pci/hda/hda_tegra.c:146:13: got void *dword_addr sound/pci/hda/hda_tegra.c:156:13: warning: incorrect type in argument 1 (different address spaces) sound/pci/hda/hda_tegra.c:156:13: expected void const volatile [noderef] *addr sound/pci/hda/hda_tegra.c:156:13: got void *dword_addr sound/pci/hda/hda_tegra.c:159:9: warning: incorrect type in argument 2 (different address spaces) sound/pci/hda/hda_tegra.c:159:9: expected void volatile [noderef] *addr sound/pci/hda/hda_tegra.c:159:9: got void *dword_addr sound/pci/hda/hda_tegra.c:168:13: warning: incorrect type in argument 1 (different address spaces) sound/pci/hda/hda_tegra.c:168:13: expected void const volatile [noderef] *addr sound/pci/hda/hda_tegra.c:168:13: got void *dword_addr sound/pci/hda/hda_tegra.c:173:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces)) sound/pci/hda/hda_tegra.c:173:23: expected void ( *reg_writel )( ... ) sound/pci/hda/hda_tegra.c:173:23: got void ( static [toplevel] * )( ... ) sound/pci/hda/hda_tegra.c:174:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces)) sound/pci/hda/hda_tegra.c:174:22: expected unsigned int ( *reg_readl )( ... ) sound/pci/hda/hda_tegra.c:174:22: got unsigned int ( static [toplevel] * )( ... ) sound/pci/hda/hda_tegra.c:175:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces)) sound/pci/hda/hda_tegra.c:175:23: expected void ( *reg_writew )( ... ) sound/pci/hda/hda_tegra.c:175:23: got void ( static [toplevel] * )( ... ) sound/pci/hda/hda_tegra.c:176:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces)) sound/pci/hda/hda_tegra.c:176:22: expected unsigned short ( *reg_readw )( ... ) sound/pci/hda/hda_tegra.c:176:22: got unsigned short ( static [toplevel] * )( ... ) sound/pci/hda/hda_tegra.c:177:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces)) sound/pci/hda/hda_tegra.c:177:23: expected void ( *reg_writeb )( ... ) sound/pci/hda/hda_tegra.c:177:23: got void ( static [toplevel] * )( ... ) sound/pci/hda/hda_tegra.c:178:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces)) sound/pci/hda/hda_tegra.c:178:22: expected unsigned char ( *reg_readb )( ... ) sound/pci/hda/hda_tegra.c:178:22: got unsigned char ( static [toplevel] * )( ... ) Signed-off-by: Ben Dooks Acked-by: Thierry Reding Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_tegra.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 17fd81736d3d..0621920f7617 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -115,20 +115,20 @@ static int substream_free_pages(struct azx *chip, /* * Register access ops. Tegra HDA register access is DWORD only. */ -static void hda_tegra_writel(u32 value, u32 *addr) +static void hda_tegra_writel(u32 value, u32 __iomem *addr) { writel(value, addr); } -static u32 hda_tegra_readl(u32 *addr) +static u32 hda_tegra_readl(u32 __iomem *addr) { return readl(addr); } -static void hda_tegra_writew(u16 value, u16 *addr) +static void hda_tegra_writew(u16 value, u16 __iomem *addr) { unsigned int shift = ((unsigned long)(addr) & 0x3) << 3; - void *dword_addr = (void *)((unsigned long)(addr) & ~0x3); + void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3); u32 v; v = readl(dword_addr); @@ -137,20 +137,20 @@ static void hda_tegra_writew(u16 value, u16 *addr) writel(v, dword_addr); } -static u16 hda_tegra_readw(u16 *addr) +static u16 hda_tegra_readw(u16 __iomem *addr) { unsigned int shift = ((unsigned long)(addr) & 0x3) << 3; - void *dword_addr = (void *)((unsigned long)(addr) & ~0x3); + void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3); u32 v; v = readl(dword_addr); return (v >> shift) & 0xffff; } -static void hda_tegra_writeb(u8 value, u8 *addr) +static void hda_tegra_writeb(u8 value, u8 __iomem *addr) { unsigned int shift = ((unsigned long)(addr) & 0x3) << 3; - void *dword_addr = (void *)((unsigned long)(addr) & ~0x3); + void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3); u32 v; v = readl(dword_addr); @@ -159,10 +159,10 @@ static void hda_tegra_writeb(u8 value, u8 *addr) writel(v, dword_addr); } -static u8 hda_tegra_readb(u8 *addr) +static u8 hda_tegra_readb(u8 __iomem *addr) { unsigned int shift = ((unsigned long)(addr) & 0x3) << 3; - void *dword_addr = (void *)((unsigned long)(addr) & ~0x3); + void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3); u32 v; v = readl(dword_addr); From 5c492c3f5255bd34f7ff8867515ecf98dcba2a2e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:12 +0100 Subject: [PATCH 0755/1050] arm64: smp: Add function to determine if cpus are stuck in the kernel kernel/smp.c has a fancy counter that keeps track of the number of CPUs it marked as not-present and left in cpu_park_loop(). If there are any CPUs spinning in here, features like kexec or hibernate may release them by overwriting this memory. This problem also occurs on machines using spin-tables to release secondary cores. After commit 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N") we bring all known cpus into the secondary holding pen, meaning this memory can't be re-used by kexec or hibernate. Add a function cpus_are_stuck_in_kernel() to determine if either of these cases have occurred. Signed-off-by: James Morse Acked-by: Mark Rutland Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp.h | 12 ++++++++++++ arch/arm64/kernel/smp.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e50405274..022644704a93 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e0842cb3b..62ff3c0622e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} From d74b4e4f1a6dbe27acce723e071c86a6ed154bf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:13 +0100 Subject: [PATCH 0756/1050] arm64: hibernate: Don't hibernate on systems with stuck CPUs Hibernate relies on cpu hotplug to prevent secondary cores executing the kernel text while it is being restored. Add a call to cpus_are_stuck_in_kernel() to determine if there are CPUs not counted by 'num_online_cpus()', and prevent hibernate in this case. Fixes: 82869ac57b5 ("arm64: kernel: Add support for hibernate/suspend-to-disk") Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f8df75d740f4..21ab5df9fa76 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,11 @@ int swsusp_arch_suspend(void) unsigned long flags; struct sleep_stack_data state; + if (cpus_are_stuck_in_kernel()) { + pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); + return -EBUSY; + } + local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { From ba562d5e54fd3136bfea0457add3675850247774 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 1 Jun 2016 22:21:53 +0300 Subject: [PATCH 0757/1050] pinctrl: imx: Do not treat a PIN without MUX register as an error Some PINs do not have a MUX register, it is not an error. It is necessary to allow the continuation of the PINs configuration, otherwise the whole PIN-group will be configured incorrectly. Cc: stable@vger.kernel.org Signed-off-by: Alexander Shiyan Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 47ccfcc8a647..eccb47480e1d 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, pin_reg = &info->pin_regs[pin_id]; if (pin_reg->mux_reg == -1) { - dev_err(ipctl->dev, "Pin(%s) does not support mux function\n", + dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n", info->pins[pin_id].name); - return -EINVAL; + continue; } if (info->flags & SHARE_MUX_CONF_REG) { From 0ac3c0a4025f41748a083bdd4970cb3ede802b15 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 14:17:06 -0700 Subject: [PATCH 0758/1050] pinctrl: single: Fix missing flush of posted write for a wakeirq With many repeated suspend resume cycles, the pin specific wakeirq may not always work on omaps. This is because the write to enable the pin interrupt may not have reached the device over the interconnect before suspend happens. Let's fix the issue with a flush of posted write with a readback. Cc: stable@vger.kernel.org Reported-by: Nishanth Menon Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index cf9bafa10acf..bfdf720db270 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc, else mask &= ~soc_mask; pcs->write(mask, pcswi->reg); + + /* flush posted write */ + mask = pcs->read(pcswi->reg); raw_spin_unlock(&pcs->lock); } From 9ee8ff4867d07a6429991a0b748fa9c1586a7764 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 6 Jun 2016 18:56:27 +0200 Subject: [PATCH 0759/1050] gpio: tegra: Make lockdep class file-scoped Commit b546be0db955 ("gpio: tegra: Get rid of all file scoped global variables") moved all file scoped variables into the driver-private structure to allow potentially multiple instances of the driver. The change also included turning the lockdep class into a driver-private field, which doesn't work and produces error messages such as this: [ 0.142310] BUG: key ffff8000fb3f7ab0 not in .data! Make the lockdep class file-scoped again to fix this issue. Signed-off-by: Thierry Reding Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tegra.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index ec891a27952f..661b0e34e067 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -98,7 +98,6 @@ struct tegra_gpio_info { const struct tegra_gpio_soc_config *soc; struct gpio_chip gc; struct irq_chip ic; - struct lock_class_key lock_class; u32 bank_count; }; @@ -547,6 +546,12 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume) }; +/* + * This lock class tells lockdep that GPIO irqs are in a different category + * than their parents, so it won't report false recursion. + */ +static struct lock_class_key gpio_lock_class; + static int tegra_gpio_probe(struct platform_device *pdev) { const struct tegra_gpio_soc_config *config; @@ -660,7 +665,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &tgi->lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } From 19b5a91764fddcc51b2f8e54a81d4ef231980181 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 4 Jun 2016 14:35:56 +0800 Subject: [PATCH 0760/1050] pinctrl: tegra: Fix build dependency I got below build error: ERROR: "tegra_xusb_padctl_legacy_probe" [drivers/phy/tegra/phy-tegra-xusb.ko] undefined! with below build configuration: CONFIG_ARCH_TEGRA=y CONFIG_PINCTRL_TEGRA_XUSB=y CONFIG_PHY_TEGRA_XUSB=y The problem is below line in drivers/pinctrl/Makefile obj-$(CONFIG_PINCTRL_TEGRA) += tegra/ So even CONFIG_PINCTRL_TEGRA_XUSB=y is set, kbuild still does not compile the code in drivers/pinctrl/tegra folder if !CONFIG_PINCTRL_TEGRA. phy-tegra-xusb.c does not use any symbol from pinctrl-tegra.c, so build pinctrl-tegra.c only when CONFIG_PINCTRL_TEGRA is set. Signed-off-by: Axel Lin Acked-by: Jon Hunter Signed-off-by: Linus Walleij --- drivers/pinctrl/Makefile | 2 +- drivers/pinctrl/tegra/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index e4bc1151e04f..42a5c1dddfef 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_PINCTRL_PISTACHIO) += pinctrl-pistachio.o obj-$(CONFIG_PINCTRL_ROCKCHIP) += pinctrl-rockchip.o obj-$(CONFIG_PINCTRL_SINGLE) += pinctrl-single.o obj-$(CONFIG_PINCTRL_SIRF) += sirf/ -obj-$(CONFIG_PINCTRL_TEGRA) += tegra/ +obj-$(CONFIG_ARCH_TEGRA) += tegra/ obj-$(CONFIG_PINCTRL_TZ1090) += pinctrl-tz1090.o obj-$(CONFIG_PINCTRL_TZ1090_PDC) += pinctrl-tz1090-pdc.o obj-$(CONFIG_PINCTRL_U300) += pinctrl-u300.o diff --git a/drivers/pinctrl/tegra/Makefile b/drivers/pinctrl/tegra/Makefile index a927379b6794..d9ea2be69cc4 100644 --- a/drivers/pinctrl/tegra/Makefile +++ b/drivers/pinctrl/tegra/Makefile @@ -1,4 +1,4 @@ -obj-y += pinctrl-tegra.o +obj-$(CONFIG_PINCTRL_TEGRA) += pinctrl-tegra.o obj-$(CONFIG_PINCTRL_TEGRA20) += pinctrl-tegra20.o obj-$(CONFIG_PINCTRL_TEGRA30) += pinctrl-tegra30.o obj-$(CONFIG_PINCTRL_TEGRA114) += pinctrl-tegra114.o From 942f64c4c2be19a1b4a0c4295182b42d153899bf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 20 Jun 2016 11:53:20 +0300 Subject: [PATCH 0761/1050] team: Fix possible deadlock during team enslave Both dev_uc_sync_multiple() and dev_mc_sync_multiple() require the source device to be locked by netif_addr_lock_bh(), but this is missing in team's enslave function, so add it. This fixes the following lockdep warning: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(_xmit_ETHER/1); local_irq_disable(); lock(&(&mc->mca_lock)->rlock); lock(&team_netdev_addr_lock_key); lock(&(&mc->mca_lock)->rlock); *** DEADLOCK *** Fixes: cb41c997d444 ("team: team should sync the port's uc/mc addrs when add a port") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 2ace126533cd..fdee77207323 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_dev_open; } + netif_addr_lock_bh(dev); dev_uc_sync_multiple(port_dev, dev); dev_mc_sync_multiple(port_dev, dev); + netif_addr_unlock_bh(dev); err = vlan_vids_add_by_dev(port_dev, dev); if (err) { From d19af0a76444fde629667ecb823c0ee28f9f67d8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 20 Jun 2016 11:36:28 +0200 Subject: [PATCH 0762/1050] kcm: fix /proc memory leak Every open of /proc/net/kcm leaks 16 bytes of memory as is reported by kmemleak: unreferenced object 0xffff88059c0e3458 (size 192): comm "cat", pid 1401, jiffies 4294935742 (age 310.720s) hex dump (first 32 bytes): 28 45 71 96 05 88 ff ff 00 10 00 00 00 00 00 00 (Eq............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmem_cache_alloc_trace+0x16e/0x230 [] seq_open+0x79/0x1d0 [] kcm_seq_open+0x0/0x30 [kcm] [] seq_open+0x79/0x1d0 [] __seq_open_private+0x2f/0xa0 [] seq_open_net+0x38/0xa0 ... It is caused by a missing free in the ->release path. So fix it by providing seq_release_net as the ->release method. Signed-off-by: Jiri Slaby Fixes: cd6e111bf5 (kcm: Add statistics and proc interfaces) Cc: "David S. Miller" Cc: Tom Herbert Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/kcm/kcmproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 738008726cc6..fda7f4715c58 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = { .open = kcm_seq_open, .read = seq_read, .llseek = seq_lseek, + .release = seq_release_net, }; static struct kcm_seq_muxinfo kcm_seq_muxinfo = { From 27777daa8b6df0c19aaf591d1536a586b3eb5e36 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 20 Jun 2016 09:20:46 -0400 Subject: [PATCH 0763/1050] tipc: unclone unbundled buffers before forwarding When extracting an individual message from a received "bundle" buffer, we just create a clone of the base buffer, and adjust it to point into the right position of the linearized data area of the latter. This works well for regular message reception, but during periods of extremely high load it may happen that an extracted buffer, e.g, a connection probe, is reversed and forwarded through an external interface while the preceding extracted message is still unhandled. When this happens, the header or data area of the preceding message will be partially overwritten by a MAC header, leading to unpredicatable consequences, such as a link reset. We now fix this by ensuring that the msg_reverse() function never returns a cloned buffer, and that the returned buffer always contains sufficient valid head and tail room to be forwarded. Reported-by: Erik Hugne Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 6 ++++++ net/tipc/msg.h | 11 ----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8740930f0787..17201aa8423d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,6 +41,8 @@ #include "name_table.h" #define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 static unsigned int align(unsigned int i) { @@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + if (skb_cloned(_skb) && + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + goto exit; + /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_origport(hdr, msg_destport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 024da8af91f0..7cf52fb39bee 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -94,17 +94,6 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 -/** - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM (LL_MAX_HEADER + 48) - struct tipc_skb_cb { void *handle; struct sk_buff *tail; From 93c098af09455ea7bdc6f0f6b08f6ac14fa06cf4 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 21 Jun 2016 14:20:02 +0300 Subject: [PATCH 0764/1050] net/mlx4_en: Fix the return value of a failure in VLAN VID add/kill Modify mlx4_en_vlan_rx_[add/kill]_vid to return error value in case of failure. Fixes: 8e586137e6b6 ('net: make vlan ndo_vlan_rx_[add/kill]_vid return error value') Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a2c25365a8a3..43f505e2d291 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -406,14 +406,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); - if (err) + if (err) { en_err(priv, "Failed configuring VLAN filter\n"); + goto out; + } } - if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) - en_dbg(HW, priv, "failed adding vlan %d\n", vid); - mutex_unlock(&mdev->state_lock); + err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx); + if (err) + en_dbg(HW, priv, "Failed adding vlan %d\n", vid); - return 0; +out: + mutex_unlock(&mdev->state_lock); + return err; } static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, @@ -421,7 +425,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int err; + int err = 0; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -438,7 +442,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, } mutex_unlock(&mdev->state_lock); - return 0; + return err; } static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) From 9d76931180557270796f9631e2c79b9c7bb3c9fb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 21 Jun 2016 14:20:03 +0300 Subject: [PATCH 0765/1050] net/mlx4_en: Avoid unregister_netdev at shutdown flow This allows a clean shutdown, even if some netdev clients do not release their reference from this netdev. It is enough to release the HW resources only as the kernel is shutting down. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++- include/linux/mlx4/device.h | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 43f505e2d291..0c0dfd6cdca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2036,11 +2036,20 @@ err: return -ENOMEM; } +static void mlx4_en_shutdown(struct net_device *dev) +{ + rtnl_lock(); + netif_device_detach(dev); + mlx4_en_close(dev); + rtnl_unlock(); +} void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + bool shutdown = mdev->dev->persist->interface_state & + MLX4_INTERFACE_STATE_SHUTDOWN; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2048,7 +2057,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (priv->registered) { devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, priv->port)); - unregister_netdev(dev); + if (shutdown) + mlx4_en_shutdown(dev); + else + unregister_netdev(dev); } if (priv->allocated) @@ -2073,7 +2085,8 @@ void mlx4_en_destroy_netdev(struct net_device *dev) kfree(priv->tx_ring); kfree(priv->tx_cq); - free_netdev(dev); + if (!shutdown) + free_netdev(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 372ebfa880f5..546fab0ecc3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4135,8 +4135,11 @@ static void mlx4_shutdown(struct pci_dev *pdev) mlx4_info(persist->dev, "mlx4_shutdown was called\n"); mutex_lock(&persist->interface_state_mutex); - if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) { + /* Notify mlx4 clients that the kernel is being shut down */ + persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN; mlx4_unload_one(pdev); + } mutex_unlock(&persist->interface_state_mutex); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80dec87a94f8..d46a0e7f144d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -466,6 +466,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ From 0b305ccc1b545d3389068ddc3548cbc877513b97 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 13 Jun 2016 13:48:53 +0800 Subject: [PATCH 0766/1050] gpio: 104-idi-48: Fix missing spin_lock_init for ack_lock Fixes: 9ae482104cb9 ("gpio: 104-idi-48: Clear pending interrupt once in IRQ handler") Signed-off-by: Axel Lin Acked-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-104-idi-48.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c index 6c75c83baf5a..2d2763ea1a68 100644 --- a/drivers/gpio/gpio-104-idi-48.c +++ b/drivers/gpio/gpio-104-idi-48.c @@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id) idi48gpio->irq = irq[id]; spin_lock_init(&idi48gpio->lock); + spin_lock_init(&idi48gpio->ack_lock); dev_set_drvdata(dev, idi48gpio); From 39243ee771666e02201ba89c1d76fdc28e9cf681 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 15 Jun 2016 22:57:38 +0200 Subject: [PATCH 0767/1050] gpio: make sure gpiod_to_irq() returns negative on NULL desc commit 54d77198fdfbc4f0fe11b4252c1d9c97d51a3264 ("gpio: bail out silently on NULL descriptors") doesn't work for gpiod_to_irq(): drivers assume that NULL descriptors will give negative IRQ numbers in return. It has been pointed out that returning 0 is NO_IRQ and that drivers should be amended to treat this as an error, but that is for the longer term: now let us repair the semantics. Cc: Maxime Ripard Reported-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 58d822d7e8da..f39bf05993e7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2056,7 +2056,14 @@ int gpiod_to_irq(const struct gpio_desc *desc) struct gpio_chip *chip; int offset; - VALIDATE_DESC(desc); + /* + * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics + * requires this function to not return zero on an invalid descriptor + * but rather a negative error number. + */ + if (!desc || !desc->gdev || !desc->gdev->chip) + return -EINVAL; + chip = desc->gdev->chip; offset = gpio_chip_hwgpio(desc); if (chip->to_irq) { From c0a1ecb9f4e208f4b75d88fa9669748e3fd705ab Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 16 Jun 2016 11:55:55 +0200 Subject: [PATCH 0768/1050] gpio: make library immune to error pointers Most functions that take a GPIO descriptor in need to check the descriptor for IS_ERR(). We do this mostly in the VALIDATE_DESC() macro except for the gpiod_to_irq() function which needs special handling. Cc: stable@vger.kernel.org Reported-by: Grygorii Strashko Reviewed-by: Grygorii Strashko Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f39bf05993e7..570771ed19e6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1373,8 +1373,12 @@ done: #define VALIDATE_DESC(desc) do { \ if (!desc) \ return 0; \ + if (IS_ERR(desc)) { \ + pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \ + return PTR_ERR(desc); \ + } \ if (!desc->gdev) { \ - pr_warn("%s: invalid GPIO\n", __func__); \ + pr_warn("%s: invalid GPIO (no device)\n", __func__); \ return -EINVAL; \ } \ if ( !desc->gdev->chip ) { \ @@ -1386,8 +1390,12 @@ done: #define VALIDATE_DESC_VOID(desc) do { \ if (!desc) \ return; \ + if (IS_ERR(desc)) { \ + pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \ + return; \ + } \ if (!desc->gdev) { \ - pr_warn("%s: invalid GPIO\n", __func__); \ + pr_warn("%s: invalid GPIO (no device)\n", __func__); \ return; \ } \ if (!desc->gdev->chip) { \ @@ -2061,7 +2069,7 @@ int gpiod_to_irq(const struct gpio_desc *desc) * requires this function to not return zero on an invalid descriptor * but rather a negative error number. */ - if (!desc || !desc->gdev || !desc->gdev->chip) + if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip) return -EINVAL; chip = desc->gdev->chip; From 972228d87445dc46c0a01f5f3de673ac017626f7 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 21 Jun 2016 00:31:50 +0200 Subject: [PATCH 0769/1050] ubi: Make recover_peb power cut aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recover_peb() was never power cut aware, if a power cut happened right after writing the VID header upon next attach UBI would blindly use the new partial written PEB and all data from the old PEB is lost. In order to make recover_peb() power cut aware, write the new VID with a proper crc and copy_flag set such that the UBI attach process will detect whether the new PEB is completely written or not. We cannot directly use ubi_eba_atomic_leb_change() since we'd have to unlock the LEB which is facing a write error. Cc: stable@vger.kernel.org Reported-by: Jörg Pfähler Reviewed-by: Jörg Pfähler Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/eba.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 5780dd1ba79d..ebf517271d29 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0; struct ubi_volume *vol = ubi->volumes[idx]; struct ubi_vid_hdr *vid_hdr; + uint32_t crc; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); if (!vid_hdr) @@ -599,14 +600,8 @@ retry: goto out_put; } - vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); - err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); - if (err) { - up_read(&ubi->fm_eba_sem); - goto write_error; - } + ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC); - data_size = offset + len; mutex_lock(&ubi->buf_mutex); memset(ubi->peb_buf + offset, 0xFF, len); @@ -621,6 +616,19 @@ retry: memcpy(ubi->peb_buf + offset, buf, len); + data_size = offset + len; + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + vid_hdr->copy_flag = 1; + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->data_crc = cpu_to_be32(crc); + err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); + if (err) { + mutex_unlock(&ubi->buf_mutex); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); if (err) { mutex_unlock(&ubi->buf_mutex); From 1118dce773d84f39ebd51a9fe7261f9169cb056e Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 16 Jun 2016 23:26:14 +0200 Subject: [PATCH 0770/1050] mm: Export migrate_page_move_mapping and migrate_page_copy Export these symbols such that UBIFS can implement ->migratepage. Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Acked-by: Christoph Hellwig --- mm/migrate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index 9baf41c877ff..bd3fdc202e8b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -431,6 +431,7 @@ int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } +EXPORT_SYMBOL(migrate_page_move_mapping); /* * The expected number of remaining references is the same as that @@ -586,6 +587,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) mem_cgroup_migrate(page, newpage); } +EXPORT_SYMBOL(migrate_page_copy); /************************************************************ * Migration functions From 4ac1c17b2044a1b4b2fbed74451947e905fc2992 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 16 Jun 2016 23:26:15 +0200 Subject: [PATCH 0771/1050] UBIFS: Implement ->migratepage() During page migrations UBIFS might get confused and the following assert triggers: [ 213.480000] UBIFS assert failed in ubifs_set_page_dirty at 1451 (pid 436) [ 213.490000] CPU: 0 PID: 436 Comm: drm-stress-test Not tainted 4.4.4-00176-geaa802524636-dirty #1008 [ 213.490000] Hardware name: Allwinner sun4i/sun5i Families [ 213.490000] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 213.490000] [] (show_stack) from [] (dump_stack+0x8c/0xa0) [ 213.490000] [] (dump_stack) from [] (ubifs_set_page_dirty+0x44/0x50) [ 213.490000] [] (ubifs_set_page_dirty) from [] (try_to_unmap_one+0x10c/0x3a8) [ 213.490000] [] (try_to_unmap_one) from [] (rmap_walk+0xb4/0x290) [ 213.490000] [] (rmap_walk) from [] (try_to_unmap+0x64/0x80) [ 213.490000] [] (try_to_unmap) from [] (migrate_pages+0x328/0x7a0) [ 213.490000] [] (migrate_pages) from [] (alloc_contig_range+0x168/0x2f4) [ 213.490000] [] (alloc_contig_range) from [] (cma_alloc+0x170/0x2c0) [ 213.490000] [] (cma_alloc) from [] (__alloc_from_contiguous+0x38/0xd8) [ 213.490000] [] (__alloc_from_contiguous) from [] (__dma_alloc+0x23c/0x274) [ 213.490000] [] (__dma_alloc) from [] (arm_dma_alloc+0x54/0x5c) [ 213.490000] [] (arm_dma_alloc) from [] (drm_gem_cma_create+0xb8/0xf0) [ 213.490000] [] (drm_gem_cma_create) from [] (drm_gem_cma_create_with_handle+0x1c/0xe8) [ 213.490000] [] (drm_gem_cma_create_with_handle) from [] (drm_gem_cma_dumb_create+0x3c/0x48) [ 213.490000] [] (drm_gem_cma_dumb_create) from [] (drm_ioctl+0x12c/0x444) [ 213.490000] [] (drm_ioctl) from [] (do_vfs_ioctl+0x3f4/0x614) [ 213.490000] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 213.490000] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) UBIFS is using PagePrivate() which can have different meanings across filesystems. Therefore the generic page migration code cannot handle this case correctly. We have to implement our own migration function which basically does a plain copy but also duplicates the page private flag. UBIFS is not a block device filesystem and cannot use buffer_migrate_page(). Cc: stable@vger.kernel.org Signed-off-by: Kirill A. Shutemov [rw: Massaged changelog, build fixes, etc...] Signed-off-by: Richard Weinberger Acked-by: Christoph Hellwig --- fs/ubifs/file.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 08316972ff93..7bbf420d1289 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -52,6 +52,7 @@ #include "ubifs.h" #include #include +#include static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page) return ret; } +#ifdef CONFIG_MIGRATION +static int ubifs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + SetPagePrivate(newpage); + } + + migrate_page_copy(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +#endif + static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) { /* @@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidatepage = ubifs_invalidatepage, .set_page_dirty = ubifs_set_page_dirty, +#ifdef CONFIG_MIGRATION + .migratepage = ubifs_migrate_page, +#endif .releasepage = ubifs_releasepage, }; From 6e914ee629c411d9c6d160399ce7d3472d2c0ec7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 20 Jun 2016 19:23:43 +1000 Subject: [PATCH 0772/1050] powerpc: Fix faults caused by radix patching of SLB miss handler As part of the Radix MMU support we added some feature sections in the SLB miss handler. These are intended to catch the case that we incorrectly take an SLB miss when Radix is enabled, and instead of crashing weirdly they bail out to a well defined exit path and trigger an oops. However the way they were written meant the bailout case was enabled by default until we did CPU feature patching. On powermacs the early debug prints in setup_system() can cause an SLB miss, which happens before code patching, and so the SLB miss handler would incorrectly bailout and crash during boot. Fix it by inverting the sense of the feature section, so that the code which is in place at boot is correct for the hash case. Once we determine we are using Radix - which will never happen on a powermac - only then do we patch in the bailout case which unconditionally jumps. Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code") Reported-by: Denis Kirjanov Tested-by: Denis Kirjanov Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..8bcc1b457115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ +BEGIN_MMU_FTR_SECTION beq- 2f +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) .machine push .machine "power4" From 844e3be47693f92a108cb1fb3b0606bf25e9c7a6 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:01 +0530 Subject: [PATCH 0773/1050] powerpc/bpf/jit: Disable classic BPF JIT on ppc64le Classic BPF JIT was never ported completely to work on little endian powerpc. However, it can be enabled and will crash the system when used. As such, disable use of BPF JIT on ppc64le. Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.") Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Naveen N. Rao Acked-by: Thadeu Lima de Souza Cascardo Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..0a9d439bcda6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,7 +128,7 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT + select HAVE_CBPF_JIT if CPU_BIG_ENDIAN select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL From 31b9655f439a26856edca0f3f8daa368a61f16d5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2016 17:37:40 -0400 Subject: [PATCH 0774/1050] Btrfs: track transid for delayed ref flushing Using the offwakecputime bpf script I noticed most of our time was spent waiting on the delayed ref throttling. This is what is supposed to happen, but sometimes the transaction can commit and then we're waiting for throttling that doesn't matter anymore. So change this stuff to be a little smarter by tracking the transid we were in when we initiated the throttling. If the transaction we get is different then we can just bail out. This resulted in a 50% speedup in my fs_mark test, and reduced the amount of time spent throttling by 60 seconds over the entire run (which is about 30 minutes). Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 15 ++++++++++++--- fs/btrfs/inode.c | 1 + fs/btrfs/transaction.c | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 101c3cfd3f7c..4274a7bfdaed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait); + unsigned long count, u64 transid, int wait); int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 29e5d000bbee..ecfa52002363 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2835,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct async_delayed_refs { struct btrfs_root *root; + u64 transid; int count; int error; int sync; @@ -2850,9 +2851,16 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); - trans = btrfs_join_transaction(async->root); + trans = btrfs_attach_transaction(async->root); if (IS_ERR(trans)) { - async->error = PTR_ERR(trans); + if (PTR_ERR(trans) != -ENOENT) + async->error = PTR_ERR(trans); + goto done; + } + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid != async->transid) { + btrfs_end_transaction(trans, async->root); goto done; } @@ -2876,7 +2884,7 @@ done: } int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait) + unsigned long count, u64 transid, int wait) { struct async_delayed_refs *async; int ret; @@ -2888,6 +2896,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->root = root->fs_info->tree_root; async->count = count; async->error = 0; + async->transid = transid; if (wait) async->sync = 1; else diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb62418b8023..78582e339f33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4558,6 +4558,7 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, + trans->transid, trans->delayed_ref_updates * 2, 0); if (be_nice) { if (truncate_space_check(trans, root, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 765845742fde..948aa186b353 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; + u64 transid = trans->transid; unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { - btrfs_async_run_delayed_refs(root, cur, + btrfs_async_run_delayed_refs(root, cur, transid, must_run_delayed_refs == 1); } return err; From 0f873eca82a0bee45f38862e0ea2ac7b1c2a31bd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 27 Apr 2016 09:59:38 -0400 Subject: [PATCH 0775/1050] btrfs: fix deadlock in delayed_ref_async_start "Btrfs: track transid for delayed ref flushing" was deadlocking on btrfs_attach_transaction because its not safe to call from the async delayed ref start code. This commit brings back btrfs_join_transaction instead and checks for a blocked commit. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ecfa52002363..82b912a293ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2851,16 +2851,13 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); - trans = btrfs_attach_transaction(async->root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) - async->error = PTR_ERR(trans); + /* if the commit is already started, we don't need to wait here */ + if (btrfs_transaction_blocked(async->root->fs_info)) goto done; - } - /* Don't bother flushing if we got into a different transaction */ - if (trans->transid != async->transid) { - btrfs_end_transaction(trans, async->root); + trans = btrfs_join_transaction(async->root); + if (IS_ERR(trans)) { + async->error = PTR_ERR(trans); goto done; } @@ -2869,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work) * wait on delayed refs */ trans->sync = true; + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid > async->transid) + goto end; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); if (ret) async->error = ret; - +end: ret = btrfs_end_transaction(trans, async->root); if (ret && !async->error) async->error = ret; From c6887cd11149d7325328749f06719071e6c725c6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 25 Mar 2016 13:26:00 -0400 Subject: [PATCH 0776/1050] Btrfs: don't do nocow check unless we have to Before we write into prealloc/nocow space we have to make sure that there are no references to the extents we are writing into, which means checking the extent tree and csum tree in the case of nocow. So we don't want to do the nocow dance unless we can't reserve data space, since it's a serious drag on performance. With the following sequence fallocate -l10737418240 /mnt/btrfs-test/file cp --reflink /mnt/btrfs-test/file /mnt/btrfs-test/link fio --name=randwrite --rw=randwrite --bs=4k --filename=/mnt/btrfs-test/file \ --end_fsync=1 we get the worst case scenario where we have to fall back on to doing the check anyway. Without this patch lat (usec): min=5, max=111598, avg=27.65, stdev=124.51 write: io=10240MB, bw=126876KB/s, iops=31718, runt= 82646msec With this patch lat (usec): min=3, max=91210, avg=14.09, stdev=110.62 write: io=10240MB, bw=212753KB/s, iops=53188, runt= 49286msec We get twice the throughput, half of the runtime, and half of the average latency. Thanks, Signed-off-by: Josef Bacik [ PAGE_CACHE_ removal related fixups ] Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/file.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 159a93450e26..8a538abb597a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, reserve_bytes = round_up(write_bytes + sector_offset, root->sectorsize); - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - check_can_nocow(inode, pos, &write_bytes) > 0) { - /* - * For nodata cow case, no need to reserve - * data space. - */ - only_release_metadata = true; - /* - * our prealloc extent may be smaller than - * write_bytes, so scale down. - */ - num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_SIZE); - reserve_bytes = round_up(write_bytes + sector_offset, - root->sectorsize); - goto reserve_metadata; + ret = btrfs_check_data_free_space(inode, pos, write_bytes); + if (ret < 0) { + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) && + check_can_nocow(inode, pos, &write_bytes) > 0) { + /* + * For nodata cow case, no need to reserve + * data space. + */ + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = DIV_ROUND_UP(write_bytes + offset, + PAGE_SIZE); + reserve_bytes = round_up(write_bytes + + sector_offset, + root->sectorsize); + } else { + break; + } } - ret = btrfs_check_data_free_space(inode, pos, write_bytes); - if (ret < 0) - break; - -reserve_metadata: ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); if (ret) { if (!only_release_metadata) From fefb62455f6becc87d2d8f25ba2df961add5d06c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 21 Jun 2016 01:52:36 +0300 Subject: [PATCH 0777/1050] MAINTAINERS: belong Documentation/pinctrl.txt properly I'm pretty sure that Documentation/pinctrl.txt would be better maintained by pinctrl subsystem. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e1b090f86e0d..c447bdcc179a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8959,6 +8959,7 @@ L: linux-gpio@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git S: Maintained F: Documentation/devicetree/bindings/pinctrl/ +F: Documentation/pinctrl.txt F: drivers/pinctrl/ F: include/linux/pinctrl/ From bce271f255dae8335dc4d2ee2c4531e09cc67f5a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 21 Jun 2016 12:14:07 +0200 Subject: [PATCH 0778/1050] can: fix handling of unmodifiable configuration options fix With upstream commit bb208f144cf3f59 (can: fix handling of unmodifiable configuration options) a new can_validate() function was introduced. When invoking 'ip link set can0 type can' without any configuration data can_validate() tries to validate the content without taking into account that there's totally no content. This patch adds a check for missing content. Reported-by: ajneu Signed-off-by: Oliver Hartkopp Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 910c12e2638e..348dd5001fa4 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -798,6 +798,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[]) * - control mode with CAN_CTRLMODE_FD set */ + if (!data) + return 0; + if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); From 25e1ed6e64f52a692ba3191c4fde650aab3ecc07 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 21 Jun 2016 15:45:47 +0200 Subject: [PATCH 0779/1050] can: fix oops caused by wrong rtnl dellink usage For 'real' hardware CAN devices the netlink interface is used to set CAN specific communication parameters. Real CAN hardware can not be created nor removed with the ip tool ... This patch adds a private dellink function for the CAN device driver interface that does just nothing. It's a follow up to commit 993e6f2fd ("can: fix oops caused by wrong rtnl newlink usage") but for dellink. Reported-by: ajneu Signed-off-by: Oliver Hartkopp Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 348dd5001fa4..ad535a854e5c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1011,6 +1011,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev, return -EOPNOTSUPP; } +static void can_dellink(struct net_device *dev, struct list_head *head) +{ + return; +} + static struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", .maxtype = IFLA_CAN_MAX, @@ -1019,6 +1024,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = { .validate = can_validate, .newlink = can_newlink, .changelink = can_changelink, + .dellink = can_dellink, .get_size = can_get_size, .fill_info = can_fill_info, .get_xstats_size = can_get_xstats_size, From b41aa4f8476545e2b663b1549759a8c3a66f47b0 Mon Sep 17 00:00:00 2001 From: Cristina Ciocan Date: Wed, 22 Jun 2016 14:17:19 +0300 Subject: [PATCH 0780/1050] pinctrl: baytrail: Fix mingled clock pins Fix plt clock 3, 4 and 5 pins, which were not in the proper order. Signed-off-by: Cristina Ciocan Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 677a811b3a6f..7abfd42e8ffd 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -401,9 +401,9 @@ static const struct byt_simple_func_mux byt_score_sata_mux[] = { static const unsigned int byt_score_plt_clk0_pins[] = { 96 }; static const unsigned int byt_score_plt_clk1_pins[] = { 97 }; static const unsigned int byt_score_plt_clk2_pins[] = { 98 }; -static const unsigned int byt_score_plt_clk4_pins[] = { 99 }; -static const unsigned int byt_score_plt_clk5_pins[] = { 100 }; -static const unsigned int byt_score_plt_clk3_pins[] = { 101 }; +static const unsigned int byt_score_plt_clk3_pins[] = { 99 }; +static const unsigned int byt_score_plt_clk4_pins[] = { 100 }; +static const unsigned int byt_score_plt_clk5_pins[] = { 101 }; static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = { SIMPLE_FUNC("plt_clk", 1), }; From 71873a9b38d1cc6c93e2962149a7bb7272a7cb66 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Thu, 23 Jun 2016 07:57:21 +0200 Subject: [PATCH 0781/1050] can: kvaser_usb: Add support for more Kvaser Leaf v2 devices This patch adds support for Kvaser Leaf Light HS v2 OEM, Mini PCI Express 2xHS and USBcan Light 2xHS. Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 2 ++ drivers/net/can/usb/kvaser_usb.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 2ff0df32b3d1..8483a40e7e9e 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -47,6 +47,8 @@ config CAN_KVASER_USB - Kvaser USBcan R - Kvaser Leaf Light v2 - Kvaser Mini PCI Express HS + - Kvaser Mini PCI Express 2xHS + - Kvaser USBcan Light 2xHS - Kvaser USBcan II HS/HS - Kvaser USBcan II HS/LS - Kvaser USBcan Rugged ("USBcan Rev B") diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 022bfa13ebfa..6f1f3b675ff5 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -59,11 +59,14 @@ #define USB_CAN_R_PRODUCT_ID 39 #define USB_LEAF_LITE_V2_PRODUCT_ID 288 #define USB_MINI_PCIE_HS_PRODUCT_ID 289 +#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290 +#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID 291 +#define USB_MINI_PCIE_2HS_PRODUCT_ID 292 static inline bool kvaser_is_leaf(const struct usb_device_id *id) { return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID && - id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID; + id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID; } /* Kvaser USBCan-II devices */ @@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = { .driver_info = KVASER_HAS_TXRX_ERRORS }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) }, /* USBCANII family IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), From 055ddaace03580455a7b7dbea8e93d62acee61fc Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 22 Jun 2016 20:29:37 +0200 Subject: [PATCH 0782/1050] crypto: user - re-add size check for CRYPTO_MSG_GETALG Commit 9aa867e46565 ("crypto: user - Add CRYPTO_MSG_DELRNG") accidentally removed the minimum size check for CRYPTO_MSG_GETALG netlink messages. This allows userland to send a truncated CRYPTO_MSG_GETALG message as short as a netlink header only making crypto_report() operate on uninitialized memory by accessing data beyond the end of the netlink message. Fix this be re-adding the minimum required size of CRYPTO_MSG_GETALG messages to the crypto_msg_min[] array. Fixes: 9aa867e46565 ("crypto: user - Add CRYPTO_MSG_DELRNG") Cc: stable@vger.kernel.org # v4.2 Signed-off-by: Mathias Krause Cc: Steffen Klassert Signed-off-by: Herbert Xu --- crypto/crypto_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d57..7097a3395b25 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, }; From 842775f1509054ea969f1787f38d6a0ec2ccfaba Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 10 May 2016 10:27:54 +0100 Subject: [PATCH 0783/1050] xen/balloon: Fix declared-but-not-defined warning Fix a declared-but-not-defined warning when building with XEN_BALLOON_MEMORY_HOTPLUG=n. This fixes a regression introduced by commit dfd74a1edfab ("xen/balloon: Fix crash when ballooning on x86 32 bit PAE"). Signed-off-by: Ross Lagerwall Acked-by: Juergen Gross Signed-off-by: David Vrabel --- drivers/xen/balloon.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d46839f51e73..e4db19e88ab1 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); static void balloon_process(struct work_struct *work); static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); -static void release_memory_resource(struct resource *resource); - /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state) } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static void release_memory_resource(struct resource *resource) +{ + if (!resource) + return; + + /* + * No need to reset region to identity mapped since we now + * know that no I/O can be in this region + */ + release_resource(resource); + kfree(resource); +} + static struct resource *additional_memory_resource(phys_addr_t size) { struct resource *res; @@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size) return res; } -static void release_memory_resource(struct resource *resource) -{ - if (!resource) - return; - - /* - * No need to reset region to identity mapped since we now - * know that no I/O can be in this region - */ - release_resource(resource); - kfree(resource); -} - static enum bp_state reserve_additional_memory(void) { long credit; From 1cf38741308c64d08553602b3374fb39224eeb5a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 23 Jun 2016 07:12:27 +0200 Subject: [PATCH 0784/1050] x86/xen: fix upper bound of pmd loop in xen_cleanhighmap() xen_cleanhighmap() is operating on level2_kernel_pgt only. The upper bound of the loop setting non-kernel-image entries to zero should not exceed the size of level2_kernel_pgt. Reported-by: Linus Torvalds Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 478a2de543a5..2693b7ed5a6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr, /* NOTE: The loop is more greedy than the cleanup_highmap variant. * We include the PMD passed in on _both_ boundaries. */ - for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD)); pmd++, vaddr += PMD_SIZE) { if (pmd_none(*pmd)) continue; From 02ef871ecac290919ea0c783d05da7eedeffc10e Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 21 Jun 2016 14:26:36 -0400 Subject: [PATCH 0785/1050] xen/pciback: Fix conf_space read/write overlap check. Current overlap check is evaluating to false a case where a filter field is fully contained (proper subset) of a r/w request. This change applies classical overlap check instead to include all the scenarios. More specifically, for (Hilscher GmbH CIFX 50E-DP(M/S)) device driver the logic is such that the entire confspace is read and written in 4 byte chunks. In this case as an example, CACHE_LINE_SIZE, LATENCY_TIMER and PCI_BIST are arriving together in one call to xen_pcibk_config_write() with offset == 0xc and size == 4. With the exsisting overlap check the LATENCY_TIMER field (offset == 0xd, length == 1) is fully contained in the write request and hence is excluded from write, which is incorrect. Signed-off-by: Andrey Grodzovsky Reviewed-by: Boris Ostrovsky Reviewed-by: Jan Beulich Cc: Signed-off-by: David Vrabel --- drivers/xen/xen-pciback/conf_space.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 8e67336f8ddd..6a25533da237 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { + if (req_end > field_start && field_end > req_start) { err = conf_space_read(dev, cfg_entry, field_start, &tmp_val); if (err) @@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { + if (req_end > field_start && field_end > req_start) { tmp_val = 0; err = xen_pcibk_config_read(dev, field_start, From 5ce91714b0d8c0a3ff9b858966721f508351cf4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 18 Jun 2016 00:54:47 +0200 Subject: [PATCH 0786/1050] hwmon: (dell-smm) Cache fan_type() calls and change fan detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On more Dell machines (e.g. Dell Precision M3800) fan_type() call is too expensive (CPU is too long in SMM mode) and cause kernel to hang. This is bug in Dell SMM or BIOS. This patch caches type for each fan (as it should not change) and changes the way how fan presense is detected. First it try function fan_status() as was before commit f989e55452c7 ("i8k: Add support for fan labels"). And if that fails fallback to fan_type(). *_status() functions can fail in case fan is not currently accessible (e.g. present on GPU which is currently turned off). Reported-by: Tolga Cakir Signed-off-by: Pali Rohár Link: https://bugzilla.kernel.org/show_bug.cgi?id=112021 Cc: stable@vger.kernel.org # v4.0+, will need backport Tested-by: Tolga Cakir Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 4bbc58714868..2ac87d553e22 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -238,7 +238,7 @@ static int i8k_get_fan_speed(int fan) /* * Read the fan type. */ -static int i8k_get_fan_type(int fan) +static int _i8k_get_fan_type(int fan) { struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, }; @@ -249,6 +249,17 @@ static int i8k_get_fan_type(int fan) return i8k_smm(®s) ? : regs.eax & 0xff; } +static int i8k_get_fan_type(int fan) +{ + /* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */ + static int types[2] = { INT_MIN, INT_MIN }; + + if (types[fan] == INT_MIN) + types[fan] = _i8k_get_fan_type(fan); + + return types[fan]; +} + /* * Read the fan nominal rpm for specific fan speed. */ @@ -782,13 +793,17 @@ static int __init i8k_init_hwmon(void) if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4; - /* First fan attributes, if fan type is OK */ - err = i8k_get_fan_type(0); + /* First fan attributes, if fan status or type is OK */ + err = i8k_get_fan_status(0); + if (err < 0) + err = i8k_get_fan_type(0); if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1; - /* Second fan attributes, if fan type is OK */ - err = i8k_get_fan_type(1); + /* Second fan attributes, if fan status or type is OK */ + err = i8k_get_fan_status(1); + if (err < 0) + err = i8k_get_fan_type(1); if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2; From d6b186c1e2d852a92c43f090d0d8fad4704d51ef Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 17 May 2016 15:54:50 +0100 Subject: [PATCH 0787/1050] x86/xen: avoid m2p lookup when setting early page table entries When page tables entries are set using xen_set_pte_init() during early boot there is no page fault handler that could handle a fault when performing an M2P lookup. In 64 bit guests (usually dom0) early_ioremap() would fault in xen_set_pte_init() because an M2P lookup faults because the MFN is in MMIO space and not mapped in the M2P. This lookup is done to see if the PFN in in the range used for the initial page table pages, so that the PTE may be set as read-only. The M2P lookup can be avoided by moving the check (and clear of RW) earlier when the PFN is still available. Reported-by: Kevin Moraga Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/mmu.c | 72 ++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2693b7ed5a6f..67433714b791 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -#ifdef CONFIG_X86_32 -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} -#else /* CONFIG_X86_64 */ -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn; - - if (xen_feature(XENFEAT_writable_page_tables) || - xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->mfn_list >= __START_KERNEL_map) - return pte; - - /* - * Pages belonging to the initial p2m list mapped outside the default - * address range must be mapped read-only. This region contains the - * page tables for mapping the p2m list, too, and page tables MUST be - * mapped read-only. - */ - pfn = pte_pfn(pte); - if (pfn >= xen_start_info->first_p2m_pfn && - pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) - pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW); - - return pte; -} -#endif /* CONFIG_X86_64 */ - /* * Init-time set_pte while constructing initial pagetables, which * doesn't allow RO page table pages to be remapped RW. @@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) * so always write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ +__visible pte_t xen_make_pte_init(pteval_t pte) +{ +#ifdef CONFIG_X86_64 + unsigned long pfn; + + /* + * Pages belonging to the initial p2m list mapped outside the default + * address range must be mapped read-only. This region contains the + * page tables for mapping the p2m list, too, and page tables MUST be + * mapped read-only. + */ + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT; + if (xen_start_info->mfn_list < __START_KERNEL_map && + pfn >= xen_start_info->first_p2m_pfn && + pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) + pte &= ~_PAGE_RW; +#endif + pte = pte_pfn_to_mfn(pte); + return native_make_pte(pte); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); + static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { - if (pte_mfn(pte) != INVALID_P2M_ENTRY) - pte = mask_rw_pte(ptep, pte); - else - pte = __pte_ma(0); - +#ifdef CONFIG_X86_32 + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_mfn(pte) != INVALID_P2M_ENTRY + && pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); +#endif native_set_pte(ptep, pte); } @@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; @@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .pte_val = PV_CALLEE_SAVE(xen_pte_val), .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE From f336ae03149725bb5844166c7b04f7f65f17eec9 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Wed, 22 Jun 2016 17:27:22 +0300 Subject: [PATCH 0788/1050] IB/core: Fix no default GIDs when netdevice reregisters Currently, when the netdevice returned by get_netdev is unregistered, we delete all GIDs (including the default GIDs) and reset their attributes. Therefore, when we re-register it, no default GIDs will be assigned (as their "default GID") attribute will be reset. Fixing this by keeping "default GID" attribute. Fixes: 03db3a2d81e6 ('IB/core: Add RoCE GID table management') Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 040966775f40..1a2984c28b95 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -411,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, for (ix = 0; ix < table->sz; ix++) if (table->data_vec[ix].attr.ndev == ndev) - if (!del_gid(ib_dev, port, table, ix, false)) + if (!del_gid(ib_dev, port, table, ix, + !!(table->data_vec[ix].props & + GID_TABLE_ENTRY_DEFAULT))) deleted = true; write_unlock_irq(&table->rwlock); From c65f6c5a3650876a69d1041a9d3c90986e9ca233 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 22 Jun 2016 17:27:23 +0300 Subject: [PATCH 0789/1050] IB/core: Fix RoCE v1 multicast join logic issue During multicast join of RoCEv1, IGMP join state and max hop limit were updated incorrectly. IGMP join should be sent and marked as joined only on RoCEv2 after a successful join. Max hops should be updated to the hop limit on RoCEv2 regardless of the join state. Fixes: bee3c3c91865 ('IB/cma: Join and leave multicast groups...') Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0c91ba3178a..dcde8706f123 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3878,12 +3878,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; if (addr->sa_family == AF_INET) { - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) { - mc->igmp_joined = true; - mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + if (!err) + mc->igmp_joined = true; } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) From b3556005c5f319285610e3eae88b4078e1a4d3bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:24 +0300 Subject: [PATCH 0790/1050] IB/core: Fix false search of the IB_SA_WELL_KNOWN_GUID When virtualziation is supported, VFs may send SA MADs to a GID formed by the concatenation of the subnet prefix with the IB_SA_WELL_KNOWN_GUID. When a response is required, the current code will search the local HCA's port for the received GID to figure out the GID index of the entry containing this GID. However, since this is not a real GID it will not be found and error will be printed. We change the logic to check if the destination GID is this special GID and avoid lookup in this case and use GID index 0. Fixes: a0c1b2a35087 ('IB/core: Support accessing SA in virtualized environment') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1d7d4cf442e3..6298f54b4137 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ah_attr->grh.dgid = sgid; if (!rdma_cap_eth_ah(device, port_num)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; + if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + ret = ib_find_cached_gid_by_port(device, &dgid, + IB_GID_TYPE_IB, + port_num, NULL, + &gid_index); + if (ret) + return ret; + } else { + gid_index = 0; + } } ah_attr->grh.sgid_index = (u8) gid_index; From b57141c1abe41e26c09b1b1b7ece463464ba6de2 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 22 Jun 2016 17:27:25 +0300 Subject: [PATCH 0791/1050] IB/uverbs: Initialize ib_qp_init_attr with zeros Initialize ib_qp_init_attr with zeros in order to avoid from garbage in fields that won't be set with user values. Fixes: a060b5629ab06 ('IB/core: generic RDMA READ/WRITE API') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1a8babb8ee3c..825021d1008b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_srq *srq = NULL; struct ib_qp *qp; char *buf; - struct ib_qp_init_attr attr; + struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; From c9b254955b9f8814966f5dabd34c39d0e0a2b437 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:26 +0300 Subject: [PATCH 0792/1050] IB/mlx5: Fix post send fence logic If the caller specified IB_SEND_FENCE in the send flags of the work request and no previous work request stated that the successive one should be fenced, the work request would be executed without a fence. This could result in RDMA read or atomic operations failure due to a MR being invalidated. Fix this by adding the mlx5 enumeration for fencing RDMA/atomic operations and fix the logic to apply this. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++--- include/linux/mlx5/qp.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce434228a5ea..ce0a7ab35a22 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) return MLX5_FENCE_MODE_SMALL_AND_FENCE; else return fence; - - } else { - return 0; + } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { + return MLX5_FENCE_MODE_FENCE; } + + return 0; } static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index e4e29882fdfd..630f66a186b7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -172,6 +172,7 @@ enum { enum { MLX5_FENCE_MODE_NONE = 0 << 5, MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_FENCE = 2 << 5, MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; From 00bf534fce23048aa0e6fd8dbceedf097ee65508 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Wed, 22 Jun 2016 17:27:27 +0300 Subject: [PATCH 0793/1050] IB/mlx5: Fix wrong naming of port_rcv_data counter port_xmit_data is written instead of port_rcv_data. Fixes: 3efd9a11212d ('IB/mlx5: Modify MAD reading counters method to use counter registers') Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 1534af113058..364aab9f3c9e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, pma_cnt_ext->port_xmit_data = cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, transmitted_ib_multicast.octets) >> 2); - pma_cnt_ext->port_xmit_data = + pma_cnt_ext->port_rcv_data = cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, received_ib_multicast.octets) >> 2); pma_cnt_ext->port_xmit_packets = From f2940e2c76bb554a7fbdd28ca5b90904117a9e96 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:28 +0300 Subject: [PATCH 0794/1050] IB/mlx4: Fix the SQ size of an RC QP When calculating the required size of an RC QP send queue, leave enough space for masked atomic operations, which require more space than "regular" atomic operation. Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Eran Ben Elisha Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 81b0e1fbec1d..519611793f7c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: From a6100603a4a87fc436199362bdb81cb849faaf6e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:29 +0300 Subject: [PATCH 0795/1050] IB/mlx4: Fix error flow when sending mads under SRIOV Fix mad send error flow to prevent double freeing address handles, and leaking tx_ring entries when SRIOV is active. If ib_mad_post_send fails, the address handle pointer in the tx_ring entry must be set to NULL (or there will be a double-free) and tx_tail must be incremented (or there will be a leak of tx_ring entries). The tx_ring is handled the same way in the send-completion handler. Fixes: 37bfc7c1e83f ("IB/mlx4: SR-IOV multiplex and demultiplex MADs") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index d68f506c1922..9c2e53d28f98 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); spin_unlock(&tun_qp->tx_lock); if (ret) - goto out; + goto end; tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) @@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.send_flags = IB_SEND_SIGNALED; ret = ib_post_send(src_qp, &wr.wr, &bad_wr); -out: - if (ret) - ib_destroy_ah(ah); + if (!ret) + return 0; + out: + spin_lock(&tun_qp->tx_lock); + tun_qp->tx_ix_tail++; + spin_unlock(&tun_qp->tx_lock); + tun_qp->tx_ring[tun_tx_ix].ah = NULL; +end: + ib_destroy_ah(ah); return ret; } @@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, ret = ib_post_send(send_qp, &wr.wr, &bad_wr); + if (!ret) + return 0; + + spin_lock(&sqp->tx_lock); + sqp->tx_ix_tail++; + spin_unlock(&sqp->tx_lock); + sqp->tx_ring[wire_tx_ix].ah = NULL; out: - if (ret) - ib_destroy_ah(ah); + ib_destroy_ah(ah); return ret; } From 5533c18ab02b17a7f2ac11908e2d97d4b421617d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:30 +0300 Subject: [PATCH 0796/1050] IB/mlx4: Verify port number in flow steering create flow In procedure mlx4_ib_create_flow, passing an invalid port number will cause an out-of-bounds array access. Data passed to this procedure can come from user-space. Therefore, need to validate port number before proceeding onwards. Note that we check against the number of physical ports declared at the verbs (ib core) level; When bonding is active, the verbs level sees one physical port, even though the low-level driver sees two ports. Fixes: f77c0162a339 ("IB/mlx4: Add receive flow steering support") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0eb09e104542..42a46078d7d5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); + if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + return ERR_PTR(-EINVAL); + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); From 5b420d9cf7382c6e1512e96e02d18842d272049c Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 22 Jun 2016 17:27:31 +0300 Subject: [PATCH 0797/1050] IB/mlx4: Fix memory leak if QP creation failed When RC, UC, or RAW QPs are created, a qp object is allocated (kzalloc). If at a later point (in procedure create_qp_common) the qp creation fails, this qp object must be freed. Fixes: 1ffeb2eb8be99 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Dotan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 519611793f7c..8db8405c1e99 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, { err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, &qp, gfp); - if (err) + if (err) { + kfree(qp); return ERR_PTR(err); + } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; From cbc9355a939b90263c58beb855f8151b56634c42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Jun 2016 17:27:32 +0300 Subject: [PATCH 0798/1050] IB/mlx4: Prevent cross page boundary allocation Prevent cross page boundary allocation by allocating new page, this is required to be aligned with ConnectX-3 HW requirements. Not doing that might cause to "RDMA read local protection" error. Fixes: 1b2cd0fc673c ('IB/mlx4: Support the new memory registration API') Suggested-by: Christoph Hellwig Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/mr.c | 34 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6c5ac5d8f32f..29acda249612 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -139,7 +139,7 @@ struct mlx4_ib_mr { u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; - void *pages_alloc; + size_t page_map_size; }; struct mlx4_ib_mw { diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 631272172a0b..5d73989d9771 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device, struct mlx4_ib_mr *mr, int max_pages) { - int size = max_pages * sizeof(u64); - int add_size; int ret; - add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + /* Ensure that size is aligned to DMA cacheline + * requirements. + * max_pages is limited to MLX4_MAX_FAST_REG_PAGES + * so page_map_size will never cross PAGE_SIZE. + */ + mr->page_map_size = roundup(max_pages * sizeof(u64), + MLX4_MR_PAGES_ALIGN); - mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); - if (!mr->pages_alloc) + /* Prevent cross page boundary allocation. */ + mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); + if (!mr->pages) return -ENOMEM; - mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); - mr->page_map = dma_map_single(device->dma_device, mr->pages, - size, DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); if (dma_mapping_error(device->dma_device, mr->page_map)) { ret = -ENOMEM; @@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device, } return 0; -err: - kfree(mr->pages_alloc); +err: + free_page((unsigned long)mr->pages); return ret; } @@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr) { if (mr->pages) { struct ib_device *device = mr->ibmr.device; - int size = mr->max_pages * sizeof(u64); dma_unmap_single(device->dma_device, mr->page_map, - size, DMA_TO_DEVICE); - kfree(mr->pages_alloc); + mr->page_map_size, DMA_TO_DEVICE); + free_page((unsigned long)mr->pages); mr->pages = NULL; } } @@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, mr->npages = 0; ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); return rc; } From 2aee309d3e01447c55fdf89cef05a0e2be372655 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 17 Jun 2016 19:17:49 -0700 Subject: [PATCH 0799/1050] IB/hfi1: Fix deadlock with txreq allocation slow path A failure in the get_txreq() inline will result in a slow path retry using __get_txreq(). __get_txreq() attempts to procure the qp s_lock, which is already held in all callers. Fix by deleting the s_lock maintenance in __get_txreq() and add sparse syntax hooks to future proof the code. Cc: Stable # 4.6+ Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 +--- drivers/infiniband/hw/hfi1/verbs_txreq.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index bc95c4112c61..d8fb056526f8 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx) struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) + __must_hold(&qp->s_lock) { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); write_seqlock(&dev->iowait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, } out: write_sequnlock(&dev->iowait_lock); - spin_unlock_irqrestore(&qp->s_lock, flags); return tx; } diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 1cf69b2fe4a5..a1d6e0807f97 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) + __must_hold(&qp->slock) { struct verbs_txreq *tx; struct hfi1_qp_priv *priv = qp->priv; From 8ae84f7c56044ee17ef6b700cb34d11ad9428a2e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 17 Jun 2016 19:17:54 -0700 Subject: [PATCH 0800/1050] IB/hfi1: Don't zero out qp->s_ack_queue in rvt_reset_qp Since rvt_reset_qp already zero's out qp->s_ack_queue head and tail pointers, there is no need to zero out qp->s_ack_queue itself. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c3e0d614f68b..d04f4fe522b5 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -576,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - if (qp->s_ack_queue) - memset( - qp->s_ack_queue, - 0, - rvt_max_atomic(rdi) * - sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; From c755f4afa66ad3ed98870bd3254f37c47fb2c800 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 22 Jun 2016 13:29:33 -0700 Subject: [PATCH 0801/1050] IB/rdmavt: Correct qp_priv_alloc() return value test The current drivers return errors from this calldown wrapped in an ERR_PTR(). The rdmavt code incorrectly tests for NULL. The code is fixed to use IS_ERR() and change ret according to the driver return value. Cc: Stable # 4.6+ Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 +++- include/rdma/rdma_vt.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d04f4fe522b5..41ba7e9cadaa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); - if (!priv) + if (IS_ERR(priv)) { + ret = priv; goto bail_qp; + } qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 16274e2133cd..9c9a27d42aaa 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -203,7 +203,9 @@ struct rvt_driver_provided { /* * Allocate a private queue pair data structure for driver specific - * information which is opaque to rdmavt. + * information which is opaque to rdmavt. Errors are returned via + * ERR_PTR(err). The driver is free to return NULL or a valid + * pointer. */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); From 747f1c6d9be749a29612fc78c321b97099906008 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 14 Jun 2016 16:54:16 -0500 Subject: [PATCH 0802/1050] i40iw: Correct CQ arming CQ is armed for solicited events only, ignoring other notification flags. Correct this by arming for next and arming for solicited event if IB_CQ_SOLICITED is set. Also protect CQ shadow area update with spinlock. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 02a735b64208..c6e75acbbbcd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2327,13 +2327,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq, { struct i40iw_cq *iwcq; struct i40iw_cq_uk *ukcq; - enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED; + unsigned long flags; + enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT; iwcq = (struct i40iw_cq *)ibcq; ukcq = &iwcq->sc_cq.cq_uk; - if (notify_flags == IB_CQ_NEXT_COMP) - cq_notify = IW_CQ_COMPL_EVENT; + if (notify_flags == IB_CQ_SOLICITED) + cq_notify = IW_CQ_COMPL_SOLICITED; + spin_lock_irqsave(&iwcq->lock, flags); ukcq->ops.iw_cq_request_notification(ukcq, cq_notify); + spin_unlock_irqrestore(&iwcq->lock, flags); return 0; } From ee23abd75c5076e51061c275e8f659d754a63c9d Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 14 Jun 2016 16:54:17 -0500 Subject: [PATCH 0803/1050] i40iw: Correct status check on i40iw_get_pble i40iw_get_pble returns 0 on success. Correct the check on return code. Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index c6e75acbbbcd..65bea9c8df13 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1527,7 +1527,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, mutex_lock(&iwdev->pbl_mutex); status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); mutex_unlock(&iwdev->pbl_mutex); - if (!status) + if (status) goto err1; if (palloc->level != I40IW_LEVEL_1) From 0477e18145c565f9ca74c6df4112f818f673fcaa Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 14 Jun 2016 16:54:18 -0500 Subject: [PATCH 0804/1050] i40iw: Return correct max_fast_reg_page_list_len Return correct value for max_fast_reg_page_list_len from i40iw_query_device(). Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8b9532034558..14f16a2dcbed 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -113,6 +113,7 @@ #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IW_CFG_FPM_QP_COUNT 32768 +#define I40IW_MAX_PAGES_PER_FMR 512 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65bea9c8df13..31eda323fcbf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; props->max_map_per_fmr = 1; + props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR; return 0; } From 7748e4990de42ea796543c0ffd34118c3a5e6a98 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 14 Jun 2016 16:54:19 -0500 Subject: [PATCH 0805/1050] i40iw: Enable level-1 PBL for fast memory registration Set the chunk_size to enable level-1 PBL support when the fast memory page count is more than one. Signed-off-by: Shiraz Saleem Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_verbs.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 14f16a2dcbed..b738acdb9b02 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -114,6 +114,7 @@ #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IW_CFG_FPM_QP_COUNT 32768 #define I40IW_MAX_PAGES_PER_FMR 512 +#define I40IW_MIN_PAGES_PER_FMR 1 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 31eda323fcbf..33959ed14563 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2150,6 +2150,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; struct i40iw_fast_reg_stag_info info; + memset(&info, 0, sizeof(info)); info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD; info.access_rights |= i40iw_get_user_access(flags); info.stag_key = reg_wr(ib_wr)->key & 0xff; @@ -2159,10 +2160,14 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.addr_type = I40IW_ADDR_TYPE_VA_BASED; info.va = (void *)(uintptr_t)iwmr->ibmr.iova; info.total_len = iwmr->ibmr.length; + info.reg_addr_pa = *(u64 *)palloc->level1.addr; info.first_pm_pbl_index = palloc->level1.idx; info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED; + if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR) + info.chunk_size = 1; + if (page_shift == 21) info.page_size = 1; /* 2M page */ From 962fcef33b03395051367181a0549d29d109d9a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2016 13:03:36 +0800 Subject: [PATCH 0806/1050] esp: Fix ESN generation under UDP encapsulation Blair Steven noticed that ESN in conjunction with UDP encapsulation is broken because we set the temporary ESP header to the wrong spot. This patch fixes this by first of all using the right spot, i.e., 4 bytes off the real ESP header, and then saving this information so that after encryption we can restore it properly. Fixes: 7021b2e1cddd ("esp4: Switch to new AEAD interface") Reported-by: Blair Steven Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 52 ++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 477937465a20..d95631d09248 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); From 067a7cd06f7bf860f2e3415394b065b9a0983802 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 20 Jun 2016 13:37:18 -0700 Subject: [PATCH 0807/1050] act_ife: only acquire tcf_lock for existing actions Alexey reported that we have GFP_KERNEL allocation when holding the spinlock tcf_lock. Actually we don't have to take that spinlock for all the cases, especially for the new one we just create. To modify the existing actions, we still need this spinlock to make sure the whole update is atomic. For net-next, we can get rid of this spinlock because we already hold the RTNL lock on slow path, and on fast path we can use RCU to protect the metalist. Joint work with Jamal. Reported-by: Alexey Khoroshilov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 6 ++-- net/sched/act_ife.c | 55 +++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index dc9a09aefb33..c55facd17b7e 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -36,7 +36,7 @@ struct tcf_meta_ops { int (*encode)(struct sk_buff *, void *, struct tcf_meta_info *); int (*decode)(struct sk_buff *, void *, u16 len); int (*get)(struct sk_buff *skb, struct tcf_meta_info *mi); - int (*alloc)(struct tcf_meta_info *, void *); + int (*alloc)(struct tcf_meta_info *, void *, gfp_t); void (*release)(struct tcf_meta_info *); int (*validate)(void *val, int len); struct module *owner; @@ -48,8 +48,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval); +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 658046dfe02d..e4076598f214 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -307,14 +309,14 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, exists); if (rc == 0) installed += 1; } @@ -385,8 +387,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +401,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -474,7 +477,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -504,11 +508,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -518,17 +523,19 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); From 817e9f2c5c262b2716f5d77020d118ad53315f3e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 20 Jun 2016 13:37:19 -0700 Subject: [PATCH 0808/1050] act_ife: acquire ife_mod_lock before reading ifeoplist Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ife.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e4076598f214..ea4a2fef1b71 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -277,7 +277,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool exists) + int len, bool atomic) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -286,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL); + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -296,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL); + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -309,17 +309,19 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } -static int use_all_metadata(struct tcf_ife_info *ife, bool exists) +static int use_all_metadata(struct tcf_ife_info *ife) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, exists); + rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } + read_unlock(&ife_mod_lock); if (installed) return 0; @@ -523,7 +525,7 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife, exists); + err = use_all_metadata(ife); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); From c0cf4512a31eb3cec70b066bc36ed55f7d05b8c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 23 Jun 2016 09:35:48 +0200 Subject: [PATCH 0809/1050] IB/srpt: Reduce QP buffer size The memory needed for the send and receive queues associated with a QP is proportional to the max_sge parameter. The current value of that parameter is such that with an mlx4 HCA the QP buffer size is 8 MB. Since DMA is used for communication between HCA and CPU that buffer either has to be allocated coherently or map_single() must succeed for that buffer. Since large contiguous allocations are fragile and since the maximum segment size for e.g. swiotlb is 256 KB, reduce the max_sge parameter. This patch avoids that the following text appears on the console after SRP logout and relogin on a system equipped with multiple IB HCAs: mlx4_core 0000:05:00.0: swiotlb buffer is full (sz: 8388608 bytes) swiotlb: coherent allocation failed for device 0000:05:00.0 size=8388608 CPU: 11 PID: 148 Comm: kworker/11:1 Not tainted 4.7.0-rc4-dbg+ #1 Call Trace: [] dump_stack+0x67/0x92 [] swiotlb_alloc_coherent+0x141/0x150 [] x86_swiotlb_alloc_coherent+0x3e/0x50 [] mlx4_buf_direct_alloc.isra.5+0x9a/0x120 [mlx4_core] [] mlx4_buf_alloc+0x165/0x1a0 [mlx4_core] [] create_qp_common.isra.29+0x57d/0xff0 [mlx4_ib] [] mlx4_ib_create_qp+0x12a/0x3f0 [mlx4_ib] [] ib_create_qp+0x3a/0x250 [ib_core] [] srpt_cm_handler+0x4bb/0xcad [ib_srpt] [] cm_process_work+0x20/0xf0 [ib_cm] [] cm_work_handler+0x1ac0/0x2059 [ib_cm] [] process_one_work+0x19d/0x490 [] worker_thread+0x49/0x490 [] kthread+0xea/0x100 [] ret_from_fork+0x1f/0x40 Fixes: b99f8e4d7bcd ("IB/srpt: convert to the generic RDMA READ/WRITE API") Signed-off-by: Bart Van Assche Cc: Laurence Oberman Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- drivers/infiniband/ulp/srpt/ib_srpt.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e68b20cba70b..4a4155640d51 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1638,8 +1638,7 @@ retry: */ qp_init->cap.max_send_wr = srp_sq_size / 2; qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; - qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd, - sdev->device->attrs.max_sge); + qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; qp_init->port_num = ch->sport->port; ch->qp = ib_create_qp(sdev->pd, qp_init); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index fee6bfd7ca21..389030487da7 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -106,6 +106,7 @@ enum { SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, SRPT_DEF_SG_TABLESIZE = 128, + SRPT_DEF_SG_PER_WQE = 16, MIN_SRPT_SQ_SIZE = 16, DEF_SRPT_SQ_SIZE = 4096, From 33cdcee04be3b4482be97393167e7561b2584e1e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 22 Jun 2016 09:25:14 +0200 Subject: [PATCH 0810/1050] pwm: Fix pwm_apply_args() Commit 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates"), implemented pwm_disable() as a wrapper around pwm_apply_state(), and then, commit ef2bf4997f7d ("pwm: Improve args checking in pwm_apply_state()") added missing checks on the ->period value in pwm_apply_state() to ensure we were not passing inappropriate values to the ->config() or ->apply() methods. The conjunction of these 2 commits led to a case where pwm_disable() was no longer succeeding, thus preventing the polarity setting done in pwm_apply_args(). Set a valid period in pwm_apply_args() to ensure polarity setting won't be rejected. Signed-off-by: Boris Brezillon Reported-by: Geert Uytterhoeven Suggested-by: Brian Norris Fixes: 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates") Tested-by: Geert Uytterhoeven Reviewed-by: Brian Norris Signed-off-by: Thierry Reding --- include/linux/pwm.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 908b67c847cd..c038ae36b10e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -464,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm) static inline void pwm_apply_args(struct pwm_device *pwm) { + struct pwm_state state = { }; + /* * PWM users calling pwm_apply_args() expect to have a fresh config * where the polarity and period are set according to pwm_args info. @@ -476,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm) * at startup (even if they are actually enabled), thus authorizing * polarity setting. * - * Instead of setting ->enabled to false, we call pwm_disable() - * before pwm_set_polarity() to ensure that everything is configured - * as expected, and the PWM is really disabled when the user request - * it. + * To fulfill this requirement, we apply a new state which disables + * the PWM device and set the reference period and polarity config. * * Note that PWM users requiring a smooth handover between the * bootloader and the kernel (like critical regulators controlled by * PWM devices) will have to switch to the atomic API and avoid calling * pwm_apply_args(). */ - pwm_disable(pwm); - pwm_set_polarity(pwm, pwm->args.polarity); + + state.enabled = false; + state.polarity = pwm->args.polarity; + state.period = pwm->args.period; + + pwm_apply_state(pwm, &state); } struct pwm_lookup { From c7f1429389ec1aa25e042bb13451385fbb596f8c Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 23 Jun 2016 10:24:42 -0700 Subject: [PATCH 0811/1050] Input: xpad - fix oops when attaching an unknown Xbox One gamepad Xbox One controllers have multiple interfaces which all have the same class, subclass, and protocol. One of the these interfaces has only a single endpoint. When Xpad attempts to bind to this interface, it causes an oops when trying initialize the output URB by trying to access the second endpoint's descriptor. This situation was avoided for known Xbox One devices by checking the XTYPE constant associated with the VID and PID tuple. However, this breaks when new or previously unknown Xbox One controllers are attached to the system. This change addresses the problem by deriving the XTYPE for Xbox One controllers based on the interface protocol before checking the interface number. Fixes: 1a48ff81b391 ("Input: xpad - add support for Xbox One controllers") Signed-off-by: Cameron Gutman Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 923c57236c51..3438e98c145a 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1437,16 +1437,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id break; } - if (xpad_device[i].xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { - /* - * The Xbox One controller lists three interfaces all with the - * same interface class, subclass and protocol. Differentiate by - * interface number. - */ - return -ENODEV; - } - xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL); if (!xpad) return -ENOMEM; @@ -1478,6 +1468,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) xpad->xtype = XTYPE_XBOX360W; + else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208) + xpad->xtype = XTYPE_XBOXONE; else xpad->xtype = XTYPE_XBOX360; } else { @@ -1492,6 +1484,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id xpad->mapping |= MAP_STICKS_TO_NULL; } + if (xpad->xtype == XTYPE_XBOXONE && + intf->cur_altsetting->desc.bInterfaceNumber != 0) { + /* + * The Xbox One controller lists three interfaces all with the + * same interface class, subclass and protocol. Differentiate by + * interface number. + */ + error = -ENODEV; + goto err_free_in_urb; + } + error = xpad_init_output(intf, xpad); if (error) goto err_free_in_urb; From 04e1b65af2085d4102b2b5d2fd1e050f8ee63092 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:20:40 +0000 Subject: [PATCH 0812/1050] Btrfs: fix error return code in btrfs_init_test_fs() Fix to return a negative error code from the kern_mount() error handling case instead of 0(ret is set to 0 by register_filesystem), as done elsewhere in this function. Signed-off-by: Wei Yongjun Reviewed-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/tests/btrfs-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 10eb249ef891..02223f3f78f4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -68,7 +68,7 @@ int btrfs_init_test_fs(void) if (IS_ERR(test_mnt)) { printk(KERN_ERR "btrfs: cannot mount test file system\n"); unregister_filesystem(&test_type); - return ret; + return PTR_ERR(test_mnt); } return 0; } From 415b35a55b57a701afe7391d32a6bb0193b7d3da Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 17 Jun 2016 19:16:21 -0700 Subject: [PATCH 0813/1050] Btrfs: fix error handling in map_private_extent_buffer map_private_extent_buffer() can return -EINVAL in two different cases, 1. when the requested contents span two pages if nodesize is larger than pagesize, 2. when it detects something insane. The 2nd one used to be only a WARN_ON(1), and we decided to return a error to callers, but we didn't fix up all its callers, which will be addressed by this patch. Without this, btrfs may end up with 'general protection', ie. reading invalid memory. Reported-by: Vegard Nossum Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 +++++- fs/btrfs/extent_io.c | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6276add8538a..a85cf7d23309 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1786,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb, if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); - } else { + } else if (err == 1) { read_extent_buffer(eb, &unaligned, offset, sizeof(unaligned)); tmp = &unaligned; + } else { + return err; } } else { @@ -2830,6 +2832,8 @@ cow_done: } ret = key_search(b, key, level, &prev_cmp, &slot); + if (ret < 0) + goto done; if (level != 0) { int dec = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaee3ef55ed8..75533adef998 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5342,6 +5342,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } +/* + * return 0 if the item is found within a page. + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, @@ -5356,7 +5361,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, PAGE_SHIFT; if (i != end_i) - return -EINVAL; + return 1; if (i == 0) { offset = start_offset; From c0d2f6104e8ab2eb75e58e72494ad4b69c5227f8 Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Wed, 22 Jun 2016 09:57:01 +0800 Subject: [PATCH 0814/1050] btrfs: fix disk_i_size update bug when fallocate() fails When doing truncate operation, btrfs_setsize() will first call truncate_setsize() to set new inode->i_size, but if later btrfs_truncate() fails, btrfs_setsize() will call "i_size_write(inode, BTRFS_I(inode)->disk_i_size)" to reset the inmemory inode size, now bug occurs. It's because for truncate case btrfs_ordered_update_i_size() directly uses inode->i_size to update BTRFS_I(inode)->disk_i_size, indeed we should use the "offset" argument to update disk_i_size. Here is the call graph: ==>btrfs_truncate() ====>btrfs_truncate_inode_items() ======>btrfs_ordered_update_i_size(inode, last_size, NULL); Here btrfs_ordered_update_i_size()'s offset argument is last_size. And below test case can reveal this bug: dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=100 dev=$(losetup --show -f fs.img) mkdir -p /mnt/mntpoint mkfs.btrfs -f $dev mount $dev /mnt/mntpoint cd /mnt/mntpoint echo "workdir is: /mnt/mntpoint" blocksize=$((128 * 1024)) dd if=/dev/zero of=testfile bs=$blocksize count=1 sync count=$((17*1024*1024*1024/blocksize)) echo "file size is:" $((count*blocksize)) for ((i = 1; i <= $count; i++)); do i=$((i + 1)) dst_offset=$((blocksize * i)) xfs_io -f -c "reflink testfile 0 $dst_offset $blocksize"\ testfile > /dev/null done sync truncate --size 0 testfile ls -l testfile du -sh testfile exit In this case, truncate operation will fail for enospc reason and "du -sh testfile" returns value greater than 0, but testfile's size is 0, we need to reflect correct inode->i_size. Signed-off-by: Wang Xiaoguang Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e96634a725c3..aca8264f4a49 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -968,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; int ret = 1; + u64 orig_offset = offset; spin_lock_irq(&tree->lock); if (ordered) { @@ -983,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, /* truncate file */ if (disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = i_size; + BTRFS_I(inode)->disk_i_size = orig_offset; ret = 0; goto out; } From b7f67055d2df9b8f68f02e49d256ee3973999bd2 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 23 Jun 2016 15:16:44 +0530 Subject: [PATCH 0815/1050] Btrfs: Force stripesize to the value of sectorsize Btrfs code currently assumes stripesize to be same as sectorsize. However Btrfs-progs (until commit df05c7ed455f519e6e15e46196392e4757257305) has been setting btrfs_super_block->stripesize to a value of 4096. This commit makes sure that the value of btrfs_super_block->stripesize is a power of 2. Later, it unconditionally sets btrfs_root->stripesize to sectorsize. Signed-off-by: Chandan Rajendra Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++---- fs/btrfs/volumes.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 54cca7a1572b..60ce1190307b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2806,7 +2806,7 @@ int open_ctree(struct super_block *sb, nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); + stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); @@ -4133,9 +4133,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, btrfs_super_bytes_used(sb)); ret = -EINVAL; } - if (!is_power_of_2(btrfs_super_stripesize(sb)) || - ((btrfs_super_stripesize(sb) != sectorsize) && - (btrfs_super_stripesize(sb) != 4096))) { + if (!is_power_of_2(btrfs_super_stripesize(sb))) { btrfs_err(fs_info, "invalid stripesize %u", btrfs_super_stripesize(sb)); ret = -EINVAL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c3a2900c6030..64eec2cec1d3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4694,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_RAID5) { raid_stripe_len = find_raid56_stripe_len(ndevs - 1, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 1; } if (type & BTRFS_BLOCK_GROUP_RAID6) { raid_stripe_len = find_raid56_stripe_len(ndevs - 2, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 2; } From 02bae045f3b3bc5681f075be0a8b1313147d1df2 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 23 Jun 2016 10:58:26 +0800 Subject: [PATCH 0816/1050] drm/amd/powerplay: add some definition for FFC feature on polaris. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h | 2 ++ drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h index d41d37ab5b7c..b8f4b73c322e 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h @@ -392,6 +392,8 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_SetGpuPllDfsForSclk ((uint16_t) 0x300) #define PPSMC_MSG_Didt_Block_Function ((uint16_t) 0x301) +#define PPSMC_MSG_SetVBITimeout ((uint16_t) 0x306) + #define PPSMC_MSG_SecureSRBMWrite ((uint16_t) 0x600) #define PPSMC_MSG_SecureSRBMRead ((uint16_t) 0x601) #define PPSMC_MSG_SetAddress ((uint16_t) 0x800) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h index b85ff5400e57..899d6d8108c2 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h @@ -270,7 +270,8 @@ struct SMU74_Discrete_DpmTable { uint8_t BootPhases; uint8_t VRHotLevel; - uint8_t Reserved1[3]; + uint8_t LdoRefSel; + uint8_t Reserved1[2]; uint16_t FanStartTemperature; uint16_t FanStopTemperature; uint16_t MaxVoltage; From 83a7af6dcfd2d84066c6d19bf2bd837f7be4a5ca Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 23 Jun 2016 11:05:00 +0800 Subject: [PATCH 0817/1050] drm/amd/powerplay: disable FFC. SMC need use VBI signal for MCLK switching Send 2 x frame time as vbi timeout Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 26 +++++++++++++++---- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 2 ++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 64ee78f7d41e..20ccbc73a8f7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -1805,6 +1805,7 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; } + data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6; /* Populate CKS Lookup Table */ if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) stretch_amount2 = 0; @@ -2487,6 +2488,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to enable VR hot GPIO interrupt!", result = tmp_result); + smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay); + tmp_result = polaris10_enable_sclk_control(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to enable SCLK control!", result = tmp_result); @@ -4359,6 +4362,15 @@ static int polaris10_notify_link_speed_change_after_state_change( return 0; } +static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr) +{ + struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ? 0 : -EINVAL; +} + static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) { int tmp_result, result = 0; @@ -4407,6 +4419,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i "Failed to program memory timing parameters!", result = tmp_result); + tmp_result = polaris10_notify_smc_display(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to notify smc display settings!", + result = tmp_result); + tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to unfreeze SCLK MCLK DPM!", @@ -4441,6 +4458,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_ PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm); } + int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display) { PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay; @@ -4460,8 +4478,6 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm if (num_active_displays > 1) /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */ polaris10_notify_smc_display_change(hwmgr, false); - else - polaris10_notify_smc_display_change(hwmgr, true); return 0; } @@ -4502,6 +4518,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; + display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2); @@ -4510,8 +4528,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr) cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us)); - polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0); - return 0; } @@ -4623,7 +4639,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr) return 0; } - data->need_long_memory_training = true; + data->need_long_memory_training = false; /* * PPMCME_FirmwareDescriptorEntry *pfd = NULL; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h index d717789441f5..afc3434822d1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h @@ -315,6 +315,7 @@ struct polaris10_hwmgr { uint32_t avfs_vdroop_override_setting; bool apply_avfs_cks_off_voltage; + uint32_t frame_time_x2; }; /* To convert to Q8.8 format for firmware */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 28f571449495..77e8e33d5870 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -411,6 +411,8 @@ struct phm_cac_tdp_table { uint8_t ucVr_I2C_Line; uint8_t ucPlx_I2C_address; uint8_t ucPlx_I2C_Line; + uint32_t usBoostPowerLimit; + uint8_t ucCKS_LDO_REFSEL; }; struct phm_ppm_table { From 0812a945fbb814e7946fbe6ddcc81d054c8b6c91 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 22 Jun 2016 21:00:09 +0800 Subject: [PATCH 0818/1050] drm/amd/powerplay: Update CKS on/ CKS off voltage offset calculation CKS on/off voltage offset calculation algorithm takes in a few coefficients. We need to update them for polaris to latest coefficients to align with BB. Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 20ccbc73a8f7..d23dcce6c03c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -98,6 +98,7 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) +#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double)) static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] = { {600, 1050, 3, 0}, {600, 1050, 6, 1} }; @@ -1787,20 +1788,27 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) ro = efuse * (max -min)/255 + min; - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset + * there is a little difference in calculating + * volt_with_cks with windows */ for (i = 0; i < sclk_table->count; i++) { data->smc_state_table.Sclk_CKS_masterEn0_7 |= sclk_table->entries[i].cks_enable << i; - - volt_without_cks = (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \ - (sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100); - - volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \ - (sclk_table->entries[i].clk/10000 * 649434 /1000 - 18005)/10); + if (hwmgr->chip_id == CHIP_POLARIS10) { + volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \ + (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); + volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \ + (252248000 - sclk_table->entries[i].clk/100 * 115764)); + } else { + volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \ + (2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000)); + volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \ + (3422454 - sclk_table->entries[i].clk/100 * 18886376/10000)); + } if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); + volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 / 625); data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; } From 12afb34400eb2b301f06b2aa3535497d14faee59 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 23 Jun 2016 10:54:17 -0700 Subject: [PATCH 0819/1050] Input: wacom_w8001 - w8001_MAX_LENGTH should be 13 Somehow the patch that added two-finger touch support forgot to update W8001_MAX_LENGTH from 11 to 13. Signed-off-by: Ping Cheng Reviewed-by: Peter Hutterer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wacom_w8001.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c index bab3c6acf6a2..b1b412766da8 100644 --- a/drivers/input/touchscreen/wacom_w8001.c +++ b/drivers/input/touchscreen/wacom_w8001.c @@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar "); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -#define W8001_MAX_LENGTH 11 +#define W8001_MAX_LENGTH 13 #define W8001_LEAD_MASK 0x80 #define W8001_LEAD_BYTE 0x80 #define W8001_TAB_MASK 0x40 From 21de12ee5568fd1aec47890c72967abf791ac80a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Jun 2016 15:00:43 -0700 Subject: [PATCH 0820/1050] netem: fix a use after free If the packet was dropped by lower qdisc, then we must not access it later. Save qdisc_pkt_len(skb) in a temp variable. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 205bed00dd34..178f1630a036 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -650,14 +650,14 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); int err = qdisc_enqueue(skb, q->qdisc); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - qdisc_pkt_len(skb)); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } From 6720a305df74ca30bcc10fc316881641b6ff0c80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Jun 2016 12:11:17 -0700 Subject: [PATCH 0821/1050] locking: avoid passing around 'thread_info' in mutex debugging code None of the code actually wants a thread_info, it all wants a task_struct, and it's just converting back and forth between the two ("ti->task" to get the task_struct from the thread_info, and "task_thread_info(task)" to go the other way). No semantic change. Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/locking/mutex-debug.c | 12 ++++++------ kernel/locking/mutex-debug.h | 4 ++-- kernel/locking/mutex.c | 6 +++--- kernel/locking/mutex.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 3ef3736002d8..9c951fade415 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) } void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); /* Mark the current thread as blocked on the lock: */ - ti->task->blocked_on = waiter; + task->blocked_on = waiter; } void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); - DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); - DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); - ti->task->blocked_on = NULL; + DEBUG_LOCKS_WARN_ON(waiter->task != task); + DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); + task->blocked_on = NULL; list_del_init(&waiter->list); waiter->task = NULL; diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..d06ae3bb46c5 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock, extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 79d2d765a75f..a70b90db3909 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto skip_wait; debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + debug_mutex_add_waiter(lock, &waiter, task); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); @@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } __set_task_state(task, TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_remove_waiter(lock, &waiter, task); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); @@ -605,7 +605,7 @@ skip_wait: return 0; err: - mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + mutex_remove_waiter(lock, &waiter, task); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..a68bae5e852a 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -13,7 +13,7 @@ do { spin_lock(lock); (void)(flags); } while (0) #define spin_unlock_mutex(lock, flags) \ do { spin_unlock(lock); (void)(flags); } while (0) -#define mutex_remove_waiter(lock, waiter, ti) \ +#define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER From 52fe705b493d40b472b9e7220a71bdca8537c6b2 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 21 Jun 2016 15:39:43 +1200 Subject: [PATCH 0822/1050] net: vrf: replace hard tab with space in assignment The assignment of rth->dst.output in vrf_rt6_create() and vrf_rtable_create() used a hard tab before the '='. The neighboring assignments did not. Make the assignment of rth->dst.output consistent with the surrounding code. Signed-off-by: Chris Packham Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index dff08842f26d..8bd8c7e1ee87 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -304,7 +304,7 @@ static int vrf_rt6_create(struct net_device *dev) dst_hold(&rt6->dst); rt6->rt6i_table = rt6i_table; - rt6->dst.output = vrf_output6; + rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); rc = 0; @@ -403,7 +403,7 @@ static int vrf_rtable_create(struct net_device *dev) if (!rth) return -ENOMEM; - rth->dst.output = vrf_output; + rth->dst.output = vrf_output; rth->rt_table_id = vrf->tb_id; rcu_assign_pointer(vrf->rth, rth); From efeb2267bba8aa893afdadfc9bae4790777c600c Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 21 Jun 2016 16:26:49 +0800 Subject: [PATCH 0823/1050] geneve: fix tx_errors statistics Tx errors present summation of errors encountered while transmitting packets. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- drivers/net/geneve.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 305a04e45a13..cc39cefeae45 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -958,8 +958,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1048,8 +1048,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } #endif From da01e18a37a57f360222d3a123b8f6994aa1ad14 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Jun 2016 12:20:01 -0700 Subject: [PATCH 0824/1050] x86: avoid avoid passing around 'thread_info' in stack dumping code None of the code actually wants a thread_info, it all wants a task_struct, and it's just converting to a thread_info pointer much too early. No semantic change. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/stacktrace.h | 6 +++--- arch/x86/kernel/dumpstack.c | 22 ++++++++++------------ arch/x86/kernel/dumpstack_32.c | 4 +--- arch/x86/kernel/dumpstack_64.c | 8 +++----- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 7c247e7404be..0944218af9e2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,7 +14,7 @@ extern int kstack_depth_to_print; struct thread_info; struct stacktrace_ops; -typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, +typedef unsigned long (*walk_stack_t)(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, @@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, int *graph); extern unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); extern unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2bb25c3fe2e8..d6209f3a69cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -42,16 +42,14 @@ void printk_address(unsigned long address) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task; unsigned long ret_addr; int index; if (addr != (unsigned long)return_to_handler) return; - task = tinfo->task; index = task->curr_ret_stack; if (!task->ret_stack || index < *graph) @@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, +static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = tinfo; + void *t = task_thread_info(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo, EXPORT_SYMBOL_GPL(print_context_stack); unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) @@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo, struct stack_frame *frame = (struct stack_frame *)bp; unsigned long *ret_addr = &frame->return_address; - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; if (!__kernel_text_address(addr)) @@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo, break; frame = frame->next_frame; ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } return (unsigned long)frame; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 464ffd69b92e..fef917e79b9d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, bp = stack_frame(task, regs); for (;;) { - struct thread_info *context; void *end_stack; end_stack = is_hardirq_stack(stack, cpu); if (!end_stack) end_stack = is_softirq_stack(stack, cpu); - context = task_thread_info(task); - bp = ops->walk_stack(context, stack, bp, ops, data, + bp = ops->walk_stack(task, stack, bp, ops, data, end_stack, &graph); /* Stop if not on irq stack */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5f1c6266eb30..d558a8a49016 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; unsigned used = 0; @@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); while (!done) { unsigned long *stack_end; enum stack_type stype; @@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, ops, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); ops->stack(data, ""); /* @@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); /* * We link to the next stack (which would be @@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); From c40e4096bf0a12a76c349cbc4d1b2aa40a3d68cd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 21 Jun 2016 16:36:06 +0300 Subject: [PATCH 0825/1050] MAINTAINERS: Update Mellanox's mlx4 Eth NIC driver entry Tariq Toukan is replacing Eugenia (Jenny) Emantayev as the mlx4 Ethernet driver maintainer, thanks to Jenny and good luck to him. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2ebe195951af..8cffceb61a2d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7406,7 +7406,7 @@ F: drivers/scsi/megaraid.* F: drivers/scsi/megaraid/ MELLANOX ETHERNET DRIVER (mlx4_en) -M: Eugenia Emantayev +M: Tariq Toukan L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com From 9e72ac7492149a229ce9039c680849cb682d7092 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 23 Jun 2016 10:55:11 -0700 Subject: [PATCH 0826/1050] Input: wacom_w8001 - ignore invalid pen data packets ThinkPad X60 Tablet PC (pen only device) sometime posts packets that are larger than W8001_PKTLEN_TPCPEN. Reported-by: Chris J Arges Tested-by: Chris J Arges Signed-off-by: Ping Cheng Reviewed-by: Peter Hutterer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wacom_w8001.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c index b1b412766da8..0c9191cf324d 100644 --- a/drivers/input/touchscreen/wacom_w8001.c +++ b/drivers/input/touchscreen/wacom_w8001.c @@ -339,6 +339,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio, w8001->idx = 0; parse_multi_touch(w8001); break; + + default: + /* + * ThinkPad X60 Tablet PC (pen only device) sometimes + * sends invalid data packets that are larger than + * W8001_PKTLEN_TPCPEN. Let's start over again. + */ + if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1) + w8001->idx = 0; } return IRQ_HANDLED; From 226ba707744a51acb4244724e09caacb1d96aed9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 21 Jun 2016 16:09:00 -0700 Subject: [PATCH 0827/1050] Input: elantech - add more IC body types to the list The touchpad in HP Pavilion 14-ab057ca reports it's version as 12 and according to Elan both 11 and 12 are valid IC types and should be identified as hw_version 4. Reported-by: Patrick Lessard Tested-by: Patrick Lessard Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 78f93cf68840..be5b399da5d3 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6: - case 7: - case 8: - case 9: - case 10: - case 13: - case 14: + case 6 ... 14: etd->hw_version = 4; break; default: From 3c67a829bd45f99b5c03580bb898c99fcc023356 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 23 Jun 2016 08:45:42 +0200 Subject: [PATCH 0828/1050] cpufreq: pcc-cpufreq: Fix doorbell.access_width Commit 920de6ebfab8 (ACPICA: Hardware: Enhance acpi_hw_validate_register() with access_width/bit_offset awareness) apparently exposed a latent bug, doorbell.access_width is initialized to 64, but per Lv Zheng, it should be 4, and indeed, making that change does bring pcc-cpufreq back to life. Fixes: 920de6ebfab8 (ACPICA: Hardware: Enhance acpi_hw_validate_register() with access_width/bit_offset awareness) Suggested-by: Lv Zheng Signed-off-by: Mike Galbraith Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pcc-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 808a320e9d5d..a7ecb9a84c15 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void) doorbell.space_id = reg_resource->space_id; doorbell.bit_width = reg_resource->bit_width; doorbell.bit_offset = reg_resource->bit_offset; - doorbell.access_width = 64; + doorbell.access_width = 4; doorbell.address = reg_resource->address; pr_debug("probe: doorbell: space_id is %d, bit_width is %d, " From 0d37189e80163c653771916afe28fae1a8d14daa Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 23 Jun 2016 11:18:43 +0900 Subject: [PATCH 0829/1050] PM / devfreq: Send the DEVFREQ_POSTCHANGE notification when target() is failed This patch sends the DEVFREQ_POSTCHANGE notification when devfreq->profile->targer() is failed. The PRECHANGE/POSTCHANGE should be paired. Fixes: 0fe3a66410a3 (PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier) Reported-by: Lin Huang Signed-off-by: Chanwoo Choi Signed-off-by: Rafael J. Wysocki --- drivers/devfreq/devfreq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index c7f47e34807b..e92418facc92 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq) devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE); err = devfreq->profile->target(devfreq->dev.parent, &freq, flags); - if (err) + if (err) { + freqs.new = cur_freq; + devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE); return err; + } freqs.new = freq; devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE); From 82d6489d0fed2ec8a8c48c19e8d8a04ac8e5bb26 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 22 Jun 2016 17:28:41 -0300 Subject: [PATCH 0830/1050] cgroup: Disable IRQs while holding css_set_lock While testing the deadline scheduler + cgroup setup I hit this warning. [ 132.612935] ------------[ cut here ]------------ [ 132.612951] WARNING: CPU: 5 PID: 0 at kernel/softirq.c:150 __local_bh_enable_ip+0x6b/0x80 [ 132.612952] Modules linked in: (a ton of modules...) [ 132.612981] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc2 #2 [ 132.612981] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.2-20150714_191134- 04/01/2014 [ 132.612982] 0000000000000086 45c8bb5effdd088b ffff88013fd43da0 ffffffff813d229e [ 132.612984] 0000000000000000 0000000000000000 ffff88013fd43de0 ffffffff810a652b [ 132.612985] 00000096811387b5 0000000000000200 ffff8800bab29d80 ffff880034c54c00 [ 132.612986] Call Trace: [ 132.612987] [] dump_stack+0x63/0x85 [ 132.612994] [] __warn+0xcb/0xf0 [ 132.612997] [] ? push_dl_task.part.32+0x170/0x170 [ 132.612999] [] warn_slowpath_null+0x1d/0x20 [ 132.613000] [] __local_bh_enable_ip+0x6b/0x80 [ 132.613008] [] _raw_write_unlock_bh+0x1a/0x20 [ 132.613010] [] _raw_spin_unlock_bh+0xe/0x10 [ 132.613015] [] put_css_set+0x5c/0x60 [ 132.613016] [] cgroup_free+0x7f/0xa0 [ 132.613017] [] __put_task_struct+0x42/0x140 [ 132.613018] [] dl_task_timer+0xca/0x250 [ 132.613027] [] ? push_dl_task.part.32+0x170/0x170 [ 132.613030] [] __hrtimer_run_queues+0xee/0x270 [ 132.613031] [] hrtimer_interrupt+0xa8/0x190 [ 132.613034] [] local_apic_timer_interrupt+0x38/0x60 [ 132.613035] [] smp_apic_timer_interrupt+0x3d/0x50 [ 132.613037] [] apic_timer_interrupt+0x8c/0xa0 [ 132.613038] [] ? native_safe_halt+0x6/0x10 [ 132.613043] [] default_idle+0x1e/0xd0 [ 132.613044] [] arch_cpu_idle+0xf/0x20 [ 132.613046] [] default_idle_call+0x2a/0x40 [ 132.613047] [] cpu_startup_entry+0x2e7/0x340 [ 132.613048] [] start_secondary+0x155/0x190 [ 132.613049] ---[ end trace f91934d162ce9977 ]--- The warn is the spin_(lock|unlock)_bh(&css_set_lock) in the interrupt context. Converting the spin_lock_bh to spin_lock_irq(save) to avoid this problem - and other problems of sharing a spinlock with an interrupt. Cc: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Juri Lelli Cc: Steven Rostedt Cc: cgroups@vger.kernel.org Cc: stable@vger.kernel.org # 4.5+ Cc: linux-kernel@vger.kernel.org Reviewed-by: Rik van Riel Reviewed-by: "Luis Claudio R. Goncalves" Signed-off-by: Daniel Bristot de Oliveira Acked-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 142 +++++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 68 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 688eb0cd1851..75c0ff00aca6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -837,6 +837,8 @@ static void put_css_set_locked(struct css_set *cset) static void put_css_set(struct css_set *cset) { + unsigned long flags; + /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an @@ -845,9 +847,9 @@ static void put_css_set(struct css_set *cset) if (atomic_add_unless(&cset->refcount, -1, 1)) return; - spin_lock_bh(&css_set_lock); + spin_lock_irqsave(&css_set_lock, flags); put_css_set_locked(cset); - spin_unlock_bh(&css_set_lock); + spin_unlock_irqrestore(&css_set_lock, flags); } /* @@ -1070,11 +1072,11 @@ static struct css_set *find_css_set(struct css_set *old_cset, /* First see if we already have a cgroup group that matches * the desired set */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cset = find_existing_css_set(old_cset, cgrp, template); if (cset) get_css_set(cset); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); if (cset) return cset; @@ -1102,7 +1104,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, * find_existing_css_set() */ memcpy(cset->subsys, template, sizeof(cset->subsys)); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); /* Add reference counts and links from the new css_set. */ list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; @@ -1128,7 +1130,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, css_get(css); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return cset; } @@ -1192,7 +1194,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) * Release all the links from cset_links to this hierarchy's * root cgroup */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { list_del(&link->cset_link); @@ -1200,7 +1202,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) kfree(link); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); if (!list_empty(&root->root_list)) { list_del(&root->root_list); @@ -1600,11 +1602,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) ss->root = dst_root; css->cgroup = dcgrp; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); hash_for_each(css_set_table, i, cset, hlist) list_move_tail(&cset->e_cset_node[ss->id], &dcgrp->e_csets[ss->id]); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; @@ -1640,10 +1642,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, if (!buf) return -ENOMEM; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); if (len >= PATH_MAX) len = -ERANGE; @@ -1897,7 +1899,7 @@ static void cgroup_enable_task_cg_lists(void) { struct task_struct *p, *g; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); if (use_task_css_set_links) goto out_unlock; @@ -1922,8 +1924,12 @@ static void cgroup_enable_task_cg_lists(void) * entry won't be deleted though the process has exited. * Do it while holding siglock so that we don't end up * racing against cgroup_exit(). + * + * Interrupts were already disabled while acquiring + * the css_set_lock, so we do not need to disable it + * again when acquiring the sighand->siglock here. */ - spin_lock_irq(&p->sighand->siglock); + spin_lock(&p->sighand->siglock); if (!(p->flags & PF_EXITING)) { struct css_set *cset = task_css_set(p); @@ -1932,11 +1938,11 @@ static void cgroup_enable_task_cg_lists(void) list_add_tail(&p->cg_list, &cset->tasks); get_css_set(cset); } - spin_unlock_irq(&p->sighand->siglock); + spin_unlock(&p->sighand->siglock); } while_each_thread(g, p); read_unlock(&tasklist_lock); out_unlock: - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } static void init_cgroup_housekeeping(struct cgroup *cgrp) @@ -2043,13 +2049,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) * Link the root cgroup in this hierarchy into all the css_set * objects. */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); hash_for_each(css_set_table, i, cset, hlist) { link_css_set(&tmp_links, cset, root_cgrp); if (css_set_populated(cset)) cgroup_update_populated(root_cgrp, true); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); BUG_ON(!list_empty(&root_cgrp->self.children)); BUG_ON(atomic_read(&root->nr_cgrps) != 1); @@ -2256,11 +2262,11 @@ out_mount: struct cgroup *cgrp; mutex_lock(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cgrp = cset_cgroup_from_root(ns->root_cset, root); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); @@ -2337,11 +2343,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, char *ret; mutex_lock(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); return ret; @@ -2369,7 +2375,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) char *path = NULL; mutex_lock(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); @@ -2382,7 +2388,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) path = buf; } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); return path; } @@ -2557,7 +2563,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, * the new cgroup. There are no failure cases after here, so this * is the commit point. */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(cset, &tset->src_csets, mg_node) { list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) { struct css_set *from_cset = task_css_set(task); @@ -2568,7 +2574,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, put_css_set_locked(from_cset); } } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); /* * Migration is committed, all target tasks are now on dst_csets. @@ -2597,13 +2603,13 @@ out_cancel_attach: } } while_each_subsys_mask(); out_release_tset: - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_splice_init(&tset->dst_csets, &tset->src_csets); list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) { list_splice_tail_init(&cset->mg_tasks, &cset->tasks); list_del_init(&cset->mg_node); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return ret; } @@ -2634,7 +2640,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) lockdep_assert_held(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; @@ -2642,7 +2648,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) list_del_init(&cset->mg_preload_node); put_css_set_locked(cset); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } /** @@ -2783,7 +2789,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, * already PF_EXITING could be freed from underneath us unless we * take an rcu_read_lock. */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); rcu_read_lock(); task = leader; do { @@ -2792,7 +2798,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, break; } while_each_thread(leader, task); rcu_read_unlock(); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return cgroup_taskset_migrate(&tset, root); } @@ -2816,7 +2822,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, return -EBUSY; /* look up all src csets */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); rcu_read_lock(); task = leader; do { @@ -2826,7 +2832,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, break; } while_each_thread(leader, task); rcu_read_unlock(); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); /* prepare dst csets and commit */ ret = cgroup_migrate_prepare_dst(&preloaded_csets); @@ -2859,9 +2865,9 @@ static int cgroup_procs_write_permission(struct task_struct *task, struct cgroup *cgrp; struct inode *inode; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); while (!cgroup_is_descendant(dst_cgrp, cgrp)) cgrp = cgroup_parent(cgrp); @@ -2962,9 +2968,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) if (root == &cgrp_dfl_root) continue; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); from_cgrp = task_cgroup_from_root(from, root); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); retval = cgroup_attach_task(from_cgrp, tsk, false); if (retval) @@ -3080,7 +3086,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) percpu_down_write(&cgroup_threadgroup_rwsem); /* look up all csses currently attached to @cgrp's subtree */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { struct cgrp_cset_link *link; @@ -3088,14 +3094,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) cgroup_migrate_add_src(link->cset, dsct, &preloaded_csets); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); /* NULL dst indicates self on default hierarchy */ ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) goto out_finish; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) { struct task_struct *task, *ntask; @@ -3107,7 +3113,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) cgroup_taskset_add(task, &tset); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); ret = cgroup_taskset_migrate(&tset, cgrp->root); out_finish: @@ -3908,10 +3914,10 @@ static int cgroup_task_count(const struct cgroup *cgrp) int count = 0; struct cgrp_cset_link *link; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) count += atomic_read(&link->cset->refcount); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return count; } @@ -4249,7 +4255,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, memset(it, 0, sizeof(*it)); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); it->ss = css->ss; @@ -4262,7 +4268,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, css_task_iter_advance_css_set(it); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } /** @@ -4280,7 +4286,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) it->cur_task = NULL; } - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); if (it->task_pos) { it->cur_task = list_entry(it->task_pos, struct task_struct, @@ -4289,7 +4295,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) css_task_iter_advance(it); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return it->cur_task; } @@ -4303,10 +4309,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) void css_task_iter_end(struct css_task_iter *it) { if (it->cur_cset) { - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_del(&it->iters_node); put_css_set_locked(it->cur_cset); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } if (it->cur_task) @@ -4338,10 +4344,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) mutex_lock(&cgroup_mutex); /* all tasks in @from are being moved, all csets are source */ - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(link, &from->cset_links, cset_link) cgroup_migrate_add_src(link->cset, to, &preloaded_csets); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) @@ -5449,10 +5455,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) */ cgrp->self.flags &= ~CSS_ONLINE; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) link->cset->dead = true; - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) @@ -5723,7 +5729,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, goto out; mutex_lock(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); for_each_root(root) { struct cgroup_subsys *ss; @@ -5776,7 +5782,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, retval = 0; out_unlock: - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); kfree(buf); out: @@ -5921,13 +5927,13 @@ void cgroup_post_fork(struct task_struct *child) if (use_task_css_set_links) { struct css_set *cset; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cset = task_css_set(current); if (list_empty(&child->cg_list)) { get_css_set(cset); css_set_move_task(child, NULL, cset, false); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } /* @@ -5972,9 +5978,9 @@ void cgroup_exit(struct task_struct *tsk) cset = task_css_set(tsk); if (!list_empty(&tsk->cg_list)) { - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); css_set_move_task(tsk, cset, NULL, false); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); } else { get_css_set(cset); } @@ -6042,9 +6048,9 @@ static void cgroup_release_agent(struct work_struct *work) if (!pathbuf || !agentbuf) goto out; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); if (!path) goto out; @@ -6304,12 +6310,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, return ERR_PTR(-EPERM); mutex_lock(&cgroup_mutex); - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); cset = task_css_set(current); get_css_set(cset); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); new_ns = alloc_cgroup_ns(); @@ -6433,7 +6439,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) if (!name_buf) return -ENOMEM; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); rcu_read_lock(); cset = rcu_dereference(current->cgroups); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { @@ -6444,7 +6450,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) c->root->hierarchy_id, name_buf); } rcu_read_unlock(); - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); kfree(name_buf); return 0; } @@ -6455,7 +6461,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) struct cgroup_subsys_state *css = seq_css(seq); struct cgrp_cset_link *link; - spin_lock_bh(&css_set_lock); + spin_lock_irq(&css_set_lock); list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { struct css_set *cset = link->cset; struct task_struct *task; @@ -6478,7 +6484,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) overflow: seq_puts(seq, " ...\n"); } - spin_unlock_bh(&css_set_lock); + spin_unlock_irq(&css_set_lock); return 0; } From 52dfcc5ccfbb6697ac3cac7f7ff1e712760e1216 Mon Sep 17 00:00:00 2001 From: Dmitrii Tcvetkov Date: Mon, 20 Jun 2016 13:52:14 +0300 Subject: [PATCH 0831/1050] drm/nouveau: fix for disabled fbdev emulation Hello, after this commit: commit f045f459d925138fe7d6193a8c86406bda7e49da Author: Ben Skeggs Date: Thu Jun 2 12:23:31 2016 +1000 drm/nouveau/fbcon: fix out-of-bounds memory accesses kernel started to oops when loading nouveau module when using GTX 780 Ti video adapter. This patch fixes the problem. Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=120591 Signed-off-by: Dmitrii Tcvetkov Suggested-by: Ilia Mirkin Fixes: f045f459d925 ("nouveau_fbcon_init()") Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 300ea03be8f0..d1f248fd3506 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -552,7 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev) if (ret) goto fini; - fbcon->helper.fbdev->pixmap.buf_align = 4; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->pixmap.buf_align = 4; return 0; fini: From 60842ef8128e7bf58c024814cd0dc14319232b6c Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Thu, 23 Jun 2016 17:37:34 -0700 Subject: [PATCH 0832/1050] Input: vmmouse - remove port reservation The VMWare EFI BIOS will expose port 0x5658 as an ACPI resource. This causes the port to be reserved by the APCI module as the system comes up, making it unavailable to be reserved again by other drivers, thus preserving this VMWare port for special use in a VMWare guest. This port is designed to be shared among multiple VMWare services, such as the VMMOUSE. Because of this, VMMOUSE should not try to reserve this port on its own. The VMWare non-EFI BIOS does not do this to preserve compatibility with existing/legacy VMs. It is known that there is small chance a VM may be configured such that these ports get reserved by other non-VMWare devices, and if this ever happens, the result is undefined. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Cc: # 4.1- Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/vmmouse.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index a3f0f5a47490..0f586780ceb4 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties) return -ENXIO; } - if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) { - psmouse_dbg(psmouse, "VMMouse port in use.\n"); - return -EBUSY; - } - /* Check if the device is present */ response = ~VMMOUSE_PROTO_MAGIC; VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2); - if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) { - release_region(VMMOUSE_PROTO_PORT, 4); + if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) return -ENXIO; - } if (set_properties) { psmouse->vendor = VMMOUSE_VENDOR; @@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties) psmouse->model = version; } - release_region(VMMOUSE_PROTO_PORT, 4); - return 0; } @@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse) psmouse_reset(psmouse); input_unregister_device(priv->abs_dev); kfree(priv); - release_region(VMMOUSE_PROTO_PORT, 4); } /** @@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse) struct input_dev *rel_dev = psmouse->dev, *abs_dev; int error; - if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) { - psmouse_dbg(psmouse, "VMMouse port in use.\n"); - return -EBUSY; - } - psmouse_reset(psmouse); error = vmmouse_enable(psmouse); if (error) - goto release_region; + return error; priv = kzalloc(sizeof(*priv), GFP_KERNEL); abs_dev = input_allocate_device(); @@ -502,8 +487,5 @@ init_fail: kfree(priv); psmouse->private = NULL; -release_region: - release_region(VMMOUSE_PROTO_PORT, 4); - return error; } From 1ee6667cd8d183b2fed12f97285f184431d2caf9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 23 Jun 2016 17:50:39 -0700 Subject: [PATCH 0833/1050] libnvdimm, pfn, dax: fix initialization vs autodetect for mode + alignment The updated ndctl unit tests discovered that if a pfn configuration with a 4K alignment is read from the namespace, that alignment will be ignored in favor of the default 2M alignment. The result is that the configuration will fail initialization with a message like: dax6.1: bad offset: 0x22000 dax disabled align: 0x200000 Fix this by allowing the alignment read from the info block to override the default which is 2M not 0 in the autodetect path. This also fixes a similar problem with the mode and alignment settings silently being overwritten by the kernel when userspace has changed it. We now will either overwrite the info block if userspace changes the uuid or fail and warn if a live setting disagrees with the info block. Cc: Cc: Micah Parrish Cc: Toshi Kani Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 51 ++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f7718ec685fa..cea8350fbc7e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; + unsigned long align; + enum nd_pfn_mode mode; struct nd_namespace_io *nsio; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; @@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -ENXIO; } + align = le32_to_cpu(pfn_sb->align); + offset = le64_to_cpu(pfn_sb->dataoff); + if (align == 0) + align = 1UL << ilog2(offset); + mode = le32_to_cpu(pfn_sb->mode); + if (!nd_pfn->uuid) { - /* from probe we allocate */ + /* + * When probing a namepace via nd_pfn_probe() the uuid + * is NULL (see: nd_pfn_devinit()) we init settings from + * pfn_sb + */ nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); if (!nd_pfn->uuid) return -ENOMEM; + nd_pfn->align = align; + nd_pfn->mode = mode; } else { - /* from init we validate */ + /* + * When probing a pfn / dax instance we validate the + * live settings against the pfn_sb + */ if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) return -ENODEV; + + /* + * If the uuid validates, but other settings mismatch + * return EINVAL because userspace has managed to change + * the configuration without specifying new + * identification. + */ + if (nd_pfn->align != align || nd_pfn->mode != mode) { + dev_err(&nd_pfn->dev, + "init failed, settings mismatch\n"); + dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", + nd_pfn->align, align, nd_pfn->mode, + mode); + return -EINVAL; + } } - if (nd_pfn->align == 0) - nd_pfn->align = le32_to_cpu(pfn_sb->align); - if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { + if (align > nvdimm_namespace_capacity(ndns)) { dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", - nd_pfn->align, nvdimm_namespace_capacity(ndns)); + align, nvdimm_namespace_capacity(ndns)); return -EINVAL; } @@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) * namespace has changed since the pfn superblock was * established. */ - offset = le64_to_cpu(pfn_sb->dataoff); nsio = to_nd_namespace_io(&ndns->dev); if (offset >= resource_size(&nsio->res)) { dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", @@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align)) + if ((align && !IS_ALIGNED(offset, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { - dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", - offset); + dev_err(&nd_pfn->dev, + "bad offset: %#llx dax disabled align: %#lx\n", + offset, align); return -ENXIO; } @@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, res->start += start_pad; res->end -= end_trunc; - nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) return ERR_PTR(-EINVAL); From 81e257e964268d050f8e9188becd44d50f241d72 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 23 Jun 2016 13:45:06 +0200 Subject: [PATCH 0834/1050] drm/atomic: Make drm_atomic_legacy_backoff reset crtc->acquire_ctx Atomic updates may acquire more state than initially locked through drm_modeset_lock_crtc, running with heavy stress can cause a WARN_ON(crtc->acquire_ctx) in drm_modeset_lock_crtc: [ 601.491296] ------------[ cut here ]------------ [ 601.491366] WARNING: CPU: 0 PID: 2411 at drivers/gpu/drm/drm_modeset_lock.c:191 drm_modeset_lock_crtc+0xeb/0xf0 [drm] [ 601.491369] Modules linked in: drm i915 drm_kms_helper [ 601.491414] CPU: 0 PID: 2411 Comm: kms_cursor_lega Tainted: G U 4.7.0-rc4-patser+ #4798 [ 601.491417] Hardware name: Intel Corporation Skylake Client [ 601.491420] 0000000000000000 ffff88044d153c98 ffffffff812ead28 0000000000000000 [ 601.491425] 0000000000000000 ffff88044d153cd8 ffffffff810868e6 000000bf58058030 [ 601.491431] ffff880088b415e8 ffff880458058030 ffff88008a271548 ffff88008a271568 [ 601.491436] Call Trace: [ 601.491443] [] dump_stack+0x4d/0x65 [ 601.491447] [] __warn+0xc6/0xe0 [ 601.491452] [] warn_slowpath_null+0x18/0x20 [ 601.491472] [] drm_modeset_lock_crtc+0xeb/0xf0 [drm] [ 601.491491] [] drm_mode_cursor_common+0x66/0x180 [drm] [ 601.491509] [] drm_mode_cursor_ioctl+0x3c/0x40 [drm] [ 601.491524] [] drm_ioctl+0x14d/0x530 [drm] [ 601.491540] [] ? drm_mode_setcrtc+0x520/0x520 [drm] [ 601.491545] [] ? handle_mm_fault+0x106b/0x1430 [ 601.491550] [] ? stop_one_cpu+0x61/0x70 [ 601.491556] [] do_vfs_ioctl+0x8d/0x570 [ 601.491560] [] ? security_file_ioctl+0x3e/0x60 [ 601.491565] [] SyS_ioctl+0x74/0x80 [ 601.491571] [] ? posix_get_monotonic_raw+0xc/0x10 [ 601.491576] [] entry_SYSCALL_64_fastpath+0x13/0x8f [ 601.491581] ---[ end trace 56f3d3d85f000d00 ]--- For good measure, test mode_config.acquire_ctx too, although this should never happen. Testcase: kms_cursor_legacy Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_atomic.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c204ef32df16..9bb99e274d23 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1296,14 +1296,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes); */ void drm_atomic_legacy_backoff(struct drm_atomic_state *state) { + struct drm_device *dev = state->dev; + unsigned crtc_mask = 0; + struct drm_crtc *crtc; int ret; + bool global = false; + + drm_for_each_crtc(crtc, dev) { + if (crtc->acquire_ctx != state->acquire_ctx) + continue; + + crtc_mask |= drm_crtc_mask(crtc); + crtc->acquire_ctx = NULL; + } + + if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) { + global = true; + + dev->mode_config.acquire_ctx = NULL; + } retry: drm_modeset_backoff(state->acquire_ctx); - ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx); + ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx); if (ret) goto retry; + + drm_for_each_crtc(crtc, dev) + if (drm_crtc_mask(crtc) & crtc_mask) + crtc->acquire_ctx = state->acquire_ctx; + + if (global) + dev->mode_config.acquire_ctx = state->acquire_ctx; } EXPORT_SYMBOL(drm_atomic_legacy_backoff); From 3d22462ae915743f3be5bf1ab3d4a6b72c2bb6c9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 24 May 2016 06:27:44 -0400 Subject: [PATCH 0835/1050] cifs: stuff the fl_owner into "pid" field in the lock request Right now, we send the tgid cross the wire. What we really want to send though is a hashed fl_owner_t since samba treats this field as a generic lockowner. It turns out that because we enforce and release locks locally before they are ever sent to the server, this patch makes no difference in behavior. Still, setting OFD locks on the server using the process pid seems wrong, so I think this patch still makes sense. Signed-off-by: Jeff Layton Signed-off-by: Steve French Acked-by: Pavel Shilovsky Acked-by: Sachin Prabhu --- fs/cifs/cifsfs.c | 3 +++ fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 14 +++++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5d8b7edf8a8f..5d841f39c4b7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +__u32 cifs_lock_secret; /* * Bumps refcount for cifs super block. @@ -1266,6 +1267,8 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); + get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret)); + if (cifs_max_pending < 2) { cifs_max_pending = 2; cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bba106cdc43c..8f1d8c1e72be 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9793ae0bcaa2..d4890b6dc22d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } +static __u32 +hash_lockowner(fl_owner_t owner) +{ + return cifs_lock_secret ^ hash32_ptr((const void *)owner); +} + struct lock_to_push { struct list_head llist; __u64 offset; @@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) else type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); - lck->pid = flock->fl_pid; + lck->pid = hash_lockowner(flock->fl_owner); lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; @@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_RDLCK; else posix_lock_type = CIFS_WRLCK; - rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, + rc = CIFSSMBPosixLock(xid, tcon, netfid, + hash_lockowner(flock->fl_owner), flock->fl_start, length, flock, posix_lock_type, wait_flag); return rc; @@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, - current->tgid, flock->fl_start, length, + hash_lockowner(flock->fl_owner), + flock->fl_start, length, NULL, posix_lock_type, wait_flag); goto out; } From 202d772ba02b1deb8835a631cd8255943d1906a0 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Thu, 26 May 2016 11:52:24 +0200 Subject: [PATCH 0836/1050] cifs: use CIFS_MAX_DOMAINNAME_LEN when converting the domain name Currently in build_ntlmssp_auth_blob(), when converting the domain name to UTF16, CIFS_MAX_USERNAME_LEN limit is used. It should be CIFS_MAX_DOMAINNAME_LEN. This patch fixes this. Signed-off-by: Jerome Marchand Signed-off-by: Steve French --- fs/cifs/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index af0ec2d5ad0e..c3d086e2bd56 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -430,7 +430,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, - CIFS_MAX_USERNAME_LEN, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); From b8da344b74c822e966c6d19d6b2321efe82c5d97 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Thu, 26 May 2016 11:52:25 +0200 Subject: [PATCH 0837/1050] cifs: dynamic allocation of ntlmssp blob In sess_auth_rawntlmssp_authenticate(), the ntlmssp blob is allocated statically and its size is an "empirical" 5*sizeof(struct _AUTHENTICATE_MESSAGE) (320B on x86_64). I don't know where this value comes from or if it was ever appropriate, but it is currently insufficient: the user and domain name in UTF16 could take 1kB by themselves. Because of that, build_ntlmssp_auth_blob() might corrupt memory (out-of-bounds write). The size of ntlmssp_blob in SMB2_sess_setup() is too small too (sizeof(struct _NEGOTIATE_MESSAGE) + 500). This patch allocates the blob dynamically in build_ntlmssp_auth_blob(). Signed-off-by: Jerome Marchand Signed-off-by: Steve French CC: Stable --- fs/cifs/ntlmssp.h | 2 +- fs/cifs/sess.c | 76 ++++++++++++++++++++++++++--------------------- fs/cifs/smb2pdu.c | 10 ++----- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 848249fa120f..3079b38f0afb 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE { int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses); -int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index c3d086e2bd56..a42e99c8e00e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, sec_blob->DomainName.MaximumLength = 0; } -/* We do not malloc the blob, it is passed in pbuffer, because its - maximum possible size is fixed and small, making this approach cleaner. - This function returns the length of the data in the blob */ -int build_ntlmssp_auth_blob(unsigned char *pbuffer, +static int size_of_ntlmssp_blob(struct cifs_ses *ses) +{ + int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len + - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2; + + if (ses->domainName) + sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + else + sz += 2; + + if (ses->user_name) + sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); + else + sz += 2; + + return sz; +} + +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp) { int rc; - AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; + AUTHENTICATE_MESSAGE *sec_blob; __u32 flags; unsigned char *tmp; + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } + *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } - tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); + tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->LmChallengeResponse.BufferOffset = @@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.BufferOffset = + cpu_to_le32(tmp - *pbuffer); if (ses->user_name != NULL) { - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); - goto setup_ntlmv2_ret; - } memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE, ses->auth_key.len - CIFS_SESS_KEY_SIZE); tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE; @@ -423,7 +443,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } if (ses->domainName == NULL) { - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = 0; sec_blob->DomainName.MaximumLength = 0; tmp += 2; @@ -432,14 +452,14 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, CIFS_MAX_DOMAINNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); tmp += len; } if (ses->user_name == NULL) { - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = 0; sec_blob->UserName.MaximumLength = 0; tmp += 2; @@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); tmp += len; } - sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->WorkstationName.Length = 0; sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; @@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) && !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); sec_blob->SessionKey.MaximumLength = cpu_to_le16(CIFS_CPHTXT_SIZE); tmp += CIFS_CPHTXT_SIZE; } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = 0; sec_blob->SessionKey.MaximumLength = 0; } + *buflen = tmp - *pbuffer; setup_ntlmv2_ret: - *buflen = tmp - pbuffer; return rc; } @@ -1266,7 +1286,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; __u16 bytes_remaining; char *bcc_ptr; - char *ntlmsspblob = NULL; + unsigned char *ntlmsspblob = NULL; u16 blob_len; cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); @@ -1279,19 +1299,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) /* Build security blob before we assemble the request */ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)pSMB; - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto out; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, + rc = build_ntlmssp_auth_blob(&ntlmsspblob, &blob_len, ses, sess_data->nls_cp); if (rc) goto out_free_ntlmsspblob; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8f38e33d365b..c3e61a7a7c7c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, u16 blob_length = 0; struct key *spnego_key = NULL; char *security_blob = NULL; - char *ntlmssp_blob = NULL; + unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ cifs_dbg(FYI, "Session Setup\n"); @@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; - ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, - GFP_KERNEL); - if (ntlmssp_blob == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, + rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, nls_cp); if (rc) { cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", From a6b6befbb2806697461962edb044e3376a771ebb Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 8 Jun 2016 17:02:32 +0100 Subject: [PATCH 0838/1050] cifs: check hash calculating succeeded calc_lanman_hash() could return -ENOMEM or other errors, we should check that everything went fine before using the calculated key. Signed-off-by: Luis de Bethencourt Signed-off-by: Steve French --- fs/cifs/sess.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index a42e99c8e00e..538d9b55699a 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -710,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data) rc = calc_lanman_hash(ses->password, ses->server->cryptkey, ses->server->sec_mode & SECMODE_PW_ENCRYPT ? true : false, lnm_session_key); + if (rc) + goto out; memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); bcc_ptr += CIFS_AUTH_RESP_SIZE; From 4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 21 Jun 2016 18:52:17 +0200 Subject: [PATCH 0839/1050] locking/static_key: Fix concurrent static_key_slow_inc() The following scenario is possible: CPU 1 CPU 2 static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 0, no increment jump_label_lock() atomic_inc_return() -> key.enabled == 1 now static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 1, inc to 2 return ** static key is wrong! jump_label_update() jump_label_unlock() Testing the static key at the point marked by (**) will follow the wrong path for jumps that have not been patched yet. This can actually happen when creating many KVM virtual machines with userspace LAPIC emulation; just run several copies of the following program: #include #include #include #include int main(void) { for (;;) { int kvmfd = open("/dev/kvm", O_RDONLY); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); close(ioctl(vmfd, KVM_CREATE_VCPU, 1)); close(vmfd); close(kvmfd); } return 0; } Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call. The static key's purpose is to skip NULL pointer checks and indeed one of the processes eventually dereferences NULL. As explained in the commit that introduced the bug: 706249c222f6 ("locking/static_keys: Rework update logic") jump_label_update() needs key.enabled to be true. The solution adopted here is to temporarily make key.enabled == -1, and use go down the slow path when key.enabled <= 0. Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Cc: # v4.3+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 706249c222f6 ("locking/static_keys: Rework update logic") Link: http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonzini@redhat.com [ Small stylistic edits to the changelog and the code. ] Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 16 +++++++++++++--- kernel/jump_label.c | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0536524bb9eb..68904469fba1 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -117,13 +117,18 @@ struct module; #include +#ifdef HAVE_JUMP_LABEL + static inline int static_key_count(struct static_key *key) { - return atomic_read(&key->enabled); + /* + * -1 means the first static_key_slow_inc() is in progress. + * static_key_enabled() must return true, so return 1 here. + */ + int n = atomic_read(&key->enabled); + return n >= 0 ? n : 1; } -#ifdef HAVE_JUMP_LABEL - #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_MASK 1UL @@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + static __always_inline void jump_label_init(void) { static_key_initialized = true; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 05254eeb4b4e..4b353e0be121 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key); void static_key_slow_inc(struct static_key *key) { + int v, v1; + STATIC_KEY_CHECK_USE(); - if (atomic_inc_not_zero(&key->enabled)) - return; + + /* + * Careful if we get concurrent static_key_slow_inc() calls; + * later calls must wait for the first one to _finish_ the + * jump_label_update() process. At the same time, however, + * the jump_label_update() call below wants to see + * static_key_enabled(&key) for jumps to be updated properly. + * + * So give a special meaning to negative key->enabled: it sends + * static_key_slow_inc() down the slow path, and it is non-zero + * so it counts as "enabled" in jump_label_update(). Note that + * atomic_inc_unless_negative() checks >= 0, so roll our own. + */ + for (v = atomic_read(&key->enabled); v > 0; v = v1) { + v1 = atomic_cmpxchg(&key->enabled, v, v + 1); + if (likely(v1 == v)) + return; + } jump_label_lock(); - if (atomic_inc_return(&key->enabled) == 1) + if (atomic_read(&key->enabled) == 0) { + atomic_set(&key->enabled, -1); jump_label_update(key); + atomic_set(&key->enabled, 1); + } else { + atomic_inc(&key->enabled); + } jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_slow_inc); @@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc); static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { + /* + * The negative count check is valid even when a negative + * key->enabled is in use by static_key_slow_inc(); a + * __static_key_slow_dec() before the first static_key_slow_inc() + * returns is unbalanced, because all other static_key_slow_inc() + * instances block while the update is in progress. + */ if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { WARN(atomic_read(&key->enabled) < 0, "jump label: negative count!\n"); From 094f469172e00d6ab0a3130b0e01c83b3cf3a98d Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 16 Jun 2016 15:57:01 +0300 Subject: [PATCH 0840/1050] sched/fair: Initialize throttle_count for new task-groups lazily Cgroup created inside throttled group must inherit current throttle_count. Broken throttle_count allows to nominate throttled entries as a next buddy, later this leads to null pointer dereference in pick_next_task_fair(). This patch initialize cfs_rq->throttle_count at first enqueue: laziness allows to skip locking all rq at group creation. Lazy approach also allows to skip full sub-tree scan at throttling hierarchy (not in this patch). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Link: http://lkml.kernel.org/r/146608182119.21870.8439834428248129633.stgit@buzz Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 20 ++++++++++++++++++++ kernel/sched/sched.h | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ae68f0e3bf5..8c5d8c0c8827 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4202,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) if (!cfs_bandwidth_used()) return; + /* Synchronize hierarchical throttle counter: */ + if (unlikely(!cfs_rq->throttle_uptodate)) { + struct rq *rq = rq_of(cfs_rq); + struct cfs_rq *pcfs_rq; + struct task_group *tg; + + cfs_rq->throttle_uptodate = 1; + + /* Get closest up-to-date node, because leaves go first: */ + for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) { + pcfs_rq = tg->cfs_rq[cpu_of(rq)]; + if (pcfs_rq->throttle_uptodate) + break; + } + if (tg) { + cfs_rq->throttle_count = pcfs_rq->throttle_count; + cfs_rq->throttled_clock_task = rq_clock_task(rq); + } + } + /* an active group must be handled by the update_curr()->put() path */ if (!cfs_rq->runtime_enabled || cfs_rq->curr) return; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 72f1f3087b04..7cbeb92a1cb9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -437,7 +437,7 @@ struct cfs_rq { u64 throttled_clock, throttled_clock_task; u64 throttled_clock_task_time; - int throttled, throttle_count; + int throttled, throttle_count, throttle_uptodate; struct list_head throttled_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ From 754bd598be9bbc953bc709a9e8ed7f3188bfb9d7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 16 Jun 2016 15:57:15 +0300 Subject: [PATCH 0841/1050] sched/fair: Do not announce throttled next buddy in dequeue_task_fair() Hierarchy could be already throttled at this point. Throttled next buddy could trigger a NULL pointer dereference in pick_next_task_fair(). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/146608183552.21905.15924473394414832071.stgit@buzz Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8c5d8c0c8827..bdcbeea90c95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4537,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ + se = parent_entity(se); /* * Bias pick_next to pick a task from this cfs_rq, as * p is sleeping when it is within its sched_slice. */ - if (task_sleep && parent_entity(se)) - set_next_buddy(parent_entity(se)); - - /* avoid re-evaluating load for this entity */ - se = parent_entity(se); + if (task_sleep && se && !throttled_hierarchy(cfs_rq)) + set_next_buddy(se); break; } flags |= DEQUEUE_SLEEP; From feb245e304f343cf5e4f9123db36354144dce8a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Jun 2016 15:35:04 -0400 Subject: [PATCH 0842/1050] sched/core: Allow kthreads to fall back to online && !active cpus During CPU hotplug, CPU_ONLINE callbacks are run while the CPU is online but not active. A CPU_ONLINE callback may create or bind a kthread so that its cpus_allowed mask only allows the CPU which is being brought online. The kthread may start executing before the CPU is made active and can end up in select_fallback_rq(). In such cases, the expected behavior is selecting the CPU which is coming online; however, because select_fallback_rq() only chooses from active CPUs, it determines that the task doesn't have any viable CPU in its allowed mask and ends up overriding it to cpu_possible_mask. CPU_ONLINE callbacks should be able to put kthreads on the CPU which is coming online. Update select_fallback_rq() so that it follows cpu_online() rather than cpu_active() for kthreads. Reported-by: Gautham R Shenoy Tested-by: Gautham R. Shenoy Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Abdul Haleem Cc: Aneesh Kumar Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20160616193504.GB3262@mtj.duckdns.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4135ac83cb65..51d7105f529a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { - if (!cpu_active(dest_cpu)) + if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) + continue; + if (!cpu_online(dest_cpu)) continue; goto out; } From 93a2001bdfd5376c3dc2158653034c20392d15c5 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 23 Jun 2016 08:59:47 -0600 Subject: [PATCH 0843/1050] HID: hiddev: validate num_values for HIDIOCGUSAGES, HIDIOCSUSAGES commands This patch validates the num_values parameter from userland during the HIDIOCGUSAGES and HIDIOCSUSAGES commands. Previously, if the report id was set to HID_REPORT_ID_UNKNOWN, we would fail to validate the num_values parameter leading to a heap overflow. Cc: stable@vger.kernel.org Signed-off-by: Scott Bauer Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 2f1ddca6f2e0..700145b15088 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, goto inval; } else if (uref->usage_index >= field->report_count) goto inval; - - else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; } + if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && + (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > field->report_count)) + goto inval; + switch (cmd) { case HIDIOCGUSAGE: uref->value = field->value[uref->usage_index]; From f83c32925d45926cd0e0f18bf28e6039116c4486 Mon Sep 17 00:00:00 2001 From: Woodrow Shen Date: Fri, 24 Jun 2016 15:58:34 +0800 Subject: [PATCH 0844/1050] ALSA: hda - Fix the headset mic jack detection on Dell machine The new Dell laptop with codec 3246 can't detect headset mic when headset was inserted on the machine. So adding pin configurations into quirk table makes headset mic work correctly. Codec: Realtek ALC3246 Vendor Id: 0x10ec0256 Subsystem Id: 0x10280781 Signed-off-by: Woodrow Shen Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0fe18ede3e85..f28363e8e84c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5831,6 +5831,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60160}, {0x14, 0x90170120}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60170}, + {0x14, 0x90170120}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, From d2bd05d88d245c13b64c3bf9c8927a1c56453d8c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 24 Jun 2016 03:13:34 -0600 Subject: [PATCH 0845/1050] xen-pciback: return proper values during BAR sizing Reads following writes with all address bits set to 1 should return all changeable address bits as one, not the BAR size (nor, as was the case for the upper half of 64-bit BARs, the high half of the region's end address). Presumably this didn't cause any problems so far because consumers use the value to calculate the size (usually via val & -val), and do nothing else with it. But also consider the exception here: Unimplemented BARs should always return all zeroes. And finally, the check for whether to return the sizing address on read for the ROM BAR should ignore all non-address bits, not just the ROM Enable one. Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel --- drivers/xen/xen-pciback/conf_space_header.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index ad3d17d29c81..9ead1c2ff1dd 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) /* A write to obtain the length must happen as a 32-bit write. * This does not (yet) support writing individual bytes */ - if (value == ~PCI_ROM_ADDRESS_ENABLE) + if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U) bar->which = 1; else { u32 tmpval; @@ -225,38 +225,42 @@ static inline void read_dev_bar(struct pci_dev *dev, (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64))) { bar_info->val = res[pos - 1].start >> 32; - bar_info->len_val = res[pos - 1].end >> 32; + bar_info->len_val = -resource_size(&res[pos - 1]) >> 32; return; } } + if (!res[pos].flags || + (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET | + IORESOURCE_BUSY))) + return; + bar_info->val = res[pos].start | (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = resource_size(&res[pos]); + bar_info->len_val = -resource_size(&res[pos]) | + (res[pos].flags & PCI_REGION_FLAG_MASK); } static void *bar_init(struct pci_dev *dev, int offset) { - struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); if (!bar) return ERR_PTR(-ENOMEM); read_dev_bar(dev, bar, offset, ~0); - bar->which = 0; return bar; } static void *rom_init(struct pci_dev *dev, int offset) { - struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); if (!bar) return ERR_PTR(-ENOMEM); read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); - bar->which = 0; return bar; } From 0f087ee3f3b86a4507db4ff1d2d5a3880e4cfd16 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 24 Jun 2016 15:13:16 +0200 Subject: [PATCH 0846/1050] ALSA: hda / realtek - add two more Thinkpad IDs (5050,5053) for tpt460 fixup See: https://bugzilla.redhat.com/show_bug.cgi?id=1349539 See: https://bugzilla.kernel.org/show_bug.cgi?id=120961 Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f28363e8e84c..900bfbc3368c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5650,6 +5650,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), + SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), From d5dbbe6569481bf12dcbe3e12cff72c5f78d272c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Jun 2016 15:15:26 +0200 Subject: [PATCH 0847/1050] ALSA: dummy: Fix a use-after-free at closing syzkaller fuzzer spotted a potential use-after-free case in snd-dummy driver when hrtimer is used as backend: > ================================================================== > BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr ffff88005e5b6f68 > Read of size 8 by task syz-executor/8984 > ============================================================================= > BUG kmalloc-192 (Not tainted): kasan: bad access detected > ----------------------------------------------------------------------------- > > Disabling lock debugging due to kernel taint > INFO: Allocated in 0xbbbbbbbbbbbbbbbb age=18446705582212484632 > .... > [< none >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464 > .... > INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1 > [< none >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481 > .... > Call Trace: > [] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:333 > [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111 > [< inline >] __rb_erase_augmented include/linux/rbtree_augmented.h:218 > [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427 > [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86 > [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903 > [< inline >] remove_hrtimer kernel/time/hrtimer.c:945 > [] hrtimer_try_to_cancel+0x22a/0x570 kernel/time/hrtimer.c:1046 > [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066 > [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417 > [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507 > [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106 > [] snd_pcm_action_single+0x76/0x120 sound/core/pcm_native.c:956 > [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974 > [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139 > [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784 > [] snd_pcm_common_ioctl1+0xfae/0x2150 sound/core/pcm_native.c:2805 > [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 sound/core/pcm_native.c:2976 > [] snd_pcm_kernel_ioctl+0x11c/0x160 sound/core/pcm_native.c:3020 > [] snd_pcm_oss_sync+0x3a4/0xa30 sound/core/oss/pcm_oss.c:1693 > [] snd_pcm_oss_release+0x1ad/0x280 sound/core/oss/pcm_oss.c:2483 > ..... A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which is called certainly before other blocking ops. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/drivers/dummy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index c0f8f613f1f1..172dacd925f5 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -420,6 +420,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream) static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { + hrtimer_cancel(&dpcm->timer); tasklet_kill(&dpcm->tasklet); } From bc23676caf54c9b6e2521ef065dfddf6c50211de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:18 -0400 Subject: [PATCH 0848/1050] NFSv4.1/pnfs: Ensure we handle delegation errors in nfs4_proc_layoutget() nfs4_handle_exception() relies on the caller setting the 'inode' field in the struct nfs4_exception argument when the error applies to a delegation. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de97567795a5..27fe63b502d5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8036,7 +8036,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) .flags = RPC_TASK_ASYNC, }; struct pnfs_layout_segment *lseg = NULL; - struct nfs4_exception exception = { .timeout = *timeout }; + struct nfs4_exception exception = { + .inode = inode, + .timeout = *timeout, + }; int status = 0; dprintk("--> %s\n", __func__); From 67a3b721462c9b3bdc36ad6a583f41706402b3ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:19 -0400 Subject: [PATCH 0849/1050] NFSv4.1/pnfs: Layout stateids start out as being invalid Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0c7e0d45a4de..f619139621ec 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1290,6 +1290,7 @@ alloc_init_layout_hdr(struct inode *ino, INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); + lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; return lo; } @@ -1565,8 +1566,7 @@ lookup_again: * stateid, or it has been invalidated, then we must use the open * stateid. */ - if (lo->plh_stateid.seqid == 0 || - test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { /* * The first layoutget for the file. Need to serialize per From e5241e43883058b61a955b4bbd677fe4ffd3ae4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:20 -0400 Subject: [PATCH 0850/1050] NFSv4.1/pnfs: Add sparse lock annotations for pnfs_find_alloc_layout Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f619139621ec..ca488b52cbfb 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1298,6 +1298,8 @@ static struct pnfs_layout_hdr * pnfs_find_alloc_layout(struct inode *ino, struct nfs_open_context *ctx, gfp_t gfp_flags) + __releases(&ino->i_lock) + __acquires(&ino->i_lock) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; From dd1beb3d16f6a10683b84b89a4644065c43910f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:21 -0400 Subject: [PATCH 0851/1050] NFS/pnfs: handle bad delegation stateids in nfs4_layoutget_handle_exception We must call nfs4_handle_exception() on BAD_STATEID errors. The only exception is if the stateid argument turns out to be a layout stateid that is declared invalid. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 27fe63b502d5..406dd3eb68e2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7924,8 +7924,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, break; } lo = NFS_I(inode)->layout; - if (lo && nfs4_stateid_match(&lgp->args.stateid, - &lo->plh_stateid)) { + if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) && + nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { LIST_HEAD(head); /* @@ -7936,10 +7936,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); + status = -EAGAIN; + goto out; } else spin_unlock(&inode->i_lock); - status = -EAGAIN; - goto out; } status = nfs4_handle_exception(server, status, exception); From d2a7de0b34cd255f23d4b7a7f065677f4b1c15f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:22 -0400 Subject: [PATCH 0852/1050] NFS: Fix up O_DIRECT results if we read or wrote something, we must report it Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 979b3c4dee6a..c7326c2af2c3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) result = wait_for_completion_killable(&dreq->completion); + if (!result) { + result = dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } if (!result) result = dreq->error; - if (!result) - result = dreq->count; out: return (ssize_t) result; @@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) if (dreq->iocb) { long res = (long) dreq->error; - if (!res) + if (dreq->count != 0) { res = (long) dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } dreq->iocb->ki_complete(dreq->iocb, res, 0); } From cea7f829d3ea6e87a67220bc417260b858e278ff Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 17 Jun 2016 16:48:23 -0400 Subject: [PATCH 0853/1050] nfs4: Fix potential use after free of state in nfs4_do_reclaim. Commit e8d975e73e5f ("fixing infinite OPEN loop in 4.0 stateid recovery") introduced access to state after it was just potentially freed by nfs4_put_open_state leading to a random data corruption somewhere. BUG: unable to handle kernel paging request at ffff88004941ee40 IP: [] nfs4_do_reclaim+0x461/0x740 PGD 3501067 PUD 3504067 PMD 6ff37067 PTE 800000004941e060 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: loop rpcsec_gss_krb5 acpi_cpufreq tpm_tis joydev i2c_piix4 pcspkr tpm virtio_console nfsd ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops floppy serio_raw virtio_blk drm CPU: 6 PID: 2161 Comm: 192.168.10.253- Not tainted 4.7.0-rc1-vm-nfs+ #112 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800463dcd00 ti: ffff88003ff48000 task.ti: ffff88003ff48000 RIP: 0010:[] [] nfs4_do_reclaim+0x461/0x740 RSP: 0018:ffff88003ff4bd68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffffff81a49900 RCX: 00000000000000e8 RDX: 00000000000000e8 RSI: ffff8800418b9930 RDI: ffff880040c96c88 RBP: ffff88003ff4bdf8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880040c96c98 R13: ffff88004941ee20 R14: ffff88004941ee40 R15: ffff88004941ee00 FS: 0000000000000000(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88004941ee40 CR3: 0000000060b0b000 CR4: 00000000000006e0 Stack: ffffffff813baad5 ffff8800463dcd00 ffff880000000001 ffffffff810e6b68 ffff880043ddbc88 ffff8800418b9800 ffff8800418b98c8 ffff88004941ee48 ffff880040c96c90 ffff880040c96c00 ffff880040c96c20 ffff880040c96c40 Call Trace: [] ? nfs4_do_reclaim+0x35/0x740 [] ? trace_hardirqs_on_caller+0x128/0x1b0 [] nfs4_run_state_manager+0x5ed/0xa40 [] ? nfs4_do_reclaim+0x740/0x740 [] ? nfs4_do_reclaim+0x740/0x740 [] kthread+0x101/0x120 [] ? trace_hardirqs_on_caller+0x128/0x1b0 [] ret_from_fork+0x1f/0x40 [] ? kthread_create_on_node+0x250/0x250 Code: 65 80 4c 8b b5 78 ff ff ff e8 fc 88 4c 00 48 8b 7d 88 e8 13 67 d2 ff 49 8b 47 40 a8 02 0f 84 d3 01 00 00 4c 89 ff e8 7f f9 ff ff 41 80 26 7f 48 8b 7d c8 e8 b1 84 4c 00 e9 39 fd ff ff 3d e6 RIP [] nfs4_do_reclaim+0x461/0x740 RSP CR2: ffff88004941ee40 Signed-off-by: Oleg Drokin Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9679f4749364..834b875900d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1488,9 +1488,9 @@ restart: } spin_unlock(&state->state_lock); } - nfs4_put_open_state(state); clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + nfs4_put_open_state(state); spin_lock(&sp->so_lock); goto restart; } From 5e3a98883e7ebdd1440f829a9e9dd5c3d2c5903b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 17 Jun 2016 16:48:24 -0400 Subject: [PATCH 0854/1050] pnfs_nfs: fix _cancel_empty_pagelist pnfs_generic_commit_cancel_empty_pagelist calls nfs_commitdata_release, but that is wrong: nfs_commitdata_release puts the open context, something that isn't valid until nfs_init_commit is called, which is never the case when pnfs_generic_commit_cancel_empty_pagelist is called. This was introduced in "nfs: avoid race that crashes nfs_init_commit". Signed-off-by: Weston Andros Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 0dfc476da3e1..b38e3c0dc790 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, } /* Helper function for pnfs_generic_commit_pagelist to catch an empty - * page list. This can happen when two commits race. */ + * page list. This can happen when two commits race. + * + * This must be called instead of nfs_init_commit - call one or the other, but + * not both! + */ static bool pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, struct nfs_commit_data *data, @@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, if (list_empty(pages)) { if (atomic_dec_and_test(&cinfo->mds->rpcs_out)) wake_up_atomic_t(&cinfo->mds->rpcs_out); - nfs_commitdata_release(data); + /* don't call nfs_commitdata_release - it tries to put + * the open_context which is not acquired until nfs_init_commit + * which has not been called on @data */ + WARN_ON_ONCE(data->context); + nfs_commit_free(data); return true; } From cbebaf897e5c4862567eb799dc84acc5d7ee2678 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:25 -0400 Subject: [PATCH 0855/1050] NFS: Fix a double page unlock Since commit 0bcbf039f6b2, nfs_readpage_release() has been used to unlock the page in the read code. Fixes: 0bcbf039f6b2 ("nfs: handle request add failure properly") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6776d7a7839e..572e5b3b06f1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page) nfs_list_remove_request(new); nfs_readpage_release(new); error = desc->pgio->pg_error; - goto out_unlock; + goto out; } return 0; out_error: error = PTR_ERR(new); -out_unlock: unlock_page(page); +out: return error; } From 2d148c7e84962429a79092a56d3736a8d984f595 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:26 -0400 Subject: [PATCH 0856/1050] NFSv4.1/pnfs: Mark the layout stateid invalid when all segments are removed According to RFC5661, section 12.5.3. the layout stateid is no longer valid once the client no longer holds any layout segments. Ensure that we mark it invalid. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ca488b52cbfb..0fbe734cc38c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) + if (list_empty(&lo->plh_segs)) { + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } From d8fdb47fae5febc02e62da121f85625244b98b2e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:27 -0400 Subject: [PATCH 0857/1050] NFS: Don't let readdirplus revalidate an inode that was marked as stale Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aaf7bd0cbae2..a924d66b5608 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc, static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { + struct inode *inode; struct nfs_inode *nfsi; if (d_really_is_negative(dentry)) return 0; - nfsi = NFS_I(d_inode(dentry)); + inode = d_inode(dentry); + if (is_bad_inode(inode) || NFS_STALE(inode)) + return 0; + + nfsi = NFS_I(inode); if (entry->fattr->fileid == nfsi->fileid) return 1; if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) From 916ec34d0bafe95b977908acd8b3153c9df6f9d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:28 -0400 Subject: [PATCH 0858/1050] NFS: Fix potential race in nfs_fhget() If we don't set the mode correctly in nfs_init_locked(), then there is potential for a race with a second call to nfs_fhget that will cause inode aliasing. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52e7d6869e3b..dda689d7a8a7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque) struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); + inode->i_mode = fattr->mode; nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } From 1b982ea2ca398bdaeab6cf2aba459a9ca808f1f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2016 16:48:29 -0400 Subject: [PATCH 0859/1050] NFS: Fix an unused variable warning Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a924d66b5608..2817cce7a9f4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1368,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; - struct dentry *parent; struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; @@ -1398,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(label)) goto out; - parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); From 4995734e973a2c2e9c6f6413cbad9913fc4df0dc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Jun 2016 09:07:39 -0700 Subject: [PATCH 0860/1050] acpi, nfit: fix acpi_check_dsm() vs zero functions implemented QEMU 2.6 implements nascent support for nvdimm DSMs. Depending on configuration it may only implement the function0 dsm to indicate that no other DSMs are available. Commit 31eca76ba2fc "nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism" breaks QEMU, but QEMU is spec compliant. Per the spec the way to indicate that no functions are supported is: If Function Index is zero, the return is a buffer containing one bit for each function index, starting with zero. Bit 0 indicates whether there is support for any functions other than function 0 for the specified UUID and Revision ID. If set to zero, no functions are supported (other than function zero) for the specified UUID and Revision ID. Update the nfit driver to determine the family (interface UUID) without requiring the implementation to define any other functions, i.e. short-circuit acpi_check_dsm() to succeed per the spec. The nfit driver appears to be the only user passing funcs==0 to acpi_check_dsm(), so this behavior change of the common routine should be limited to the probing done by the nfit driver. Cc: Len Brown Cc: Jerry Hoemann Acked-by: "Rafael J. Wysocki" Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism") Reported-by: Xiao Guangrong Tested-by: Xiao Guangrong Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 6 +++--- drivers/acpi/utils.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2215fc847fa9..32579a7b71d5 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, /* * Until standardization materializes we need to consider up to 3 - * different command sets. Note, that checking for function0 (bit0) - * tells us if any commands are reachable through this uuid. + * different command sets. Note, that checking for zero functions + * tells us if any commands might be reachable through this uuid. */ for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) - if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) + if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0)) break; /* limit the supported commands to those that are publicly documented */ diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 22c09952e177..b4de130f2d57 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -680,9 +680,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) u64 mask = 0; union acpi_object *obj; - if (funcs == 0) - return false; - obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL); if (!obj) return false; @@ -695,6 +692,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) mask |= (((u64)obj->buffer.pointer[i]) << (i * 8)); ACPI_FREE(obj); + if (funcs == 0) + return true; + /* * Bit 0 indicates whether there's support for any functions other than * function 0 for the specified UUID and revision. From 485e71e8fb6356c08c7fc6bcce4bf02c9a9a663f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2016 23:57:25 +0200 Subject: [PATCH 0861/1050] posix_acl: Add set_posix_acl Factor out part of posix_acl_xattr_set into a common function that takes a posix_acl, which nfsd can also call. The prototype already exists in include/linux/posix_acl.h. Signed-off-by: Andreas Gruenbacher Cc: stable@vger.kernel.org Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: J. Bruce Fields --- fs/posix_acl.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 8a4a266beff3..edc452c2a563 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -820,6 +820,28 @@ posix_acl_xattr_get(const struct xattr_handler *handler, return error; } +int +set_posix_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + if (!IS_POSIXACL(inode)) + return -EOPNOTSUPP; + if (!inode->i_op->set_acl) + return -EOPNOTSUPP; + + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (acl) { + int ret = posix_acl_valid(acl); + if (ret) + return ret; + } + return inode->i_op->set_acl(inode, acl, type); +} +EXPORT_SYMBOL(set_posix_acl); + static int posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, @@ -829,30 +851,12 @@ posix_acl_xattr_set(const struct xattr_handler *handler, struct posix_acl *acl = NULL; int ret; - if (!IS_POSIXACL(inode)) - return -EOPNOTSUPP; - if (!inode->i_op->set_acl) - return -EOPNOTSUPP; - - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if (!inode_owner_or_capable(inode)) - return -EPERM; - if (value) { acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); - - if (acl) { - ret = posix_acl_valid(acl); - if (ret) - goto out; - } } - - ret = inode->i_op->set_acl(inode, acl, handler->flags); -out: + ret = set_posix_acl(inode, handler->flags, acl); posix_acl_release(acl); return ret; } From 999653786df6954a31044528ac3f7a5dadca08f4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 22 Jun 2016 19:43:35 +0100 Subject: [PATCH 0862/1050] nfsd: check permissions when setting ACLs Use set_posix_acl, which includes proper permission checks, instead of calling ->set_acl directly. Without this anyone may be able to grant themselves permissions to a file by setting the ACL. Lock the inode to make the new checks atomic with respect to set_acl. (Also, nfsd was the only caller of set_acl not locking the inode, so I suspect this may fix other races.) This also simplifies the code, and ensures our ACLs are checked by posix_acl_valid. The permission checks and the inode locking were lost with commit 4ac7249e, which changed nfsd to use the set_acl inode operation directly instead of going through xattr handlers. Reported-by: David Sinquin [agreunba@redhat.com: use set_posix_acl] Fixes: 4ac7249e Cc: Christoph Hellwig Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs2acl.c | 20 ++++++++++---------- fs/nfsd/nfs3acl.c | 18 ++++++++---------- fs/nfsd/nfs4acl.c | 16 ++++++++-------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1580ea6fd64d..d08cd88155c7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); if (error) - goto out_drop_write; + goto out_drop_lock; + + fh_unlock(fh); fh_drop_write(fh); @@ -131,7 +130,8 @@ out: posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 01df4cd7c753..0c890347cde3 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); - if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + fh_lock(fh); -out_drop_write: + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); + if (error) + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); + +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6adabd6049b7..71292a0d6f09 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode = d_inode(dentry); - if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) - return nfserr_attrnotsupp; - if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; @@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_error < 0) goto out_nfserr; - host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + fh_lock(fhp); + + host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); if (host_error < 0) - goto out_release; + goto out_drop_lock; if (S_ISDIR(inode->i_mode)) { - host_error = inode->i_op->set_acl(inode, dpacl, - ACL_TYPE_DEFAULT); + host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); } -out_release: +out_drop_lock: + fh_unlock(fhp); + posix_acl_release(pacl); posix_acl_release(dpacl); out_nfserr: From 4fcd1813e6404dd4420c7d12fb483f9320f0bf93 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 22 Jun 2016 20:12:05 -0500 Subject: [PATCH 0863/1050] Fix reconnect to not defer smb3 session reconnect long after socket reconnect Azure server blocks clients that open a socket and don't do anything on it. In our reconnect scenarios, we can reconnect the tcp session and detect the socket is available but we defer the negprot and SMB3 session setup and tree connect reconnection until the next i/o is requested, but this looks suspicous to some servers who expect SMB3 negprog and session setup soon after a socket is created. In the echo thread, reconnect SMB3 sessions and tree connections that are disconnected. A later patch will replay persistent (and resilient) handle opens. CC: Stable Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/connect.c | 4 +++- fs/cifs/smb2pdu.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 66736f57b5ab..7d2b15c06090 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work) * server->ops->need_neg() == true. Also, no need to ping if * we got a response recently. */ - if (!server->ops->need_neg || server->ops->need_neg(server) || + + if (server->tcpStatus == CifsNeedReconnect || + server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + echo_interval - HZ)) goto requeue_echo; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c3e61a7a7c7c..29e06db5f187 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1812,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + if (server->tcpStatus == CifsNeedNegotiate) { + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + /* add check for persistent handle reconnect */ + if (tcon && tcon->need_reconnect) { + spin_unlock(&cifs_tcp_ses_lock); + rc = smb2_reconnect(SMB2_ECHO, tcon); + spin_lock(&cifs_tcp_ses_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + } + + /* if no session, renegotiate failed above */ + if (server->tcpStatus == CifsNeedNegotiate) + return -EIO; + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; From 45e8a2583d97ca758a55c608f78c4cef562644d1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 22 Jun 2016 21:07:32 -0500 Subject: [PATCH 0864/1050] File names with trailing period or space need special case conversion POSIX allows files with trailing spaces or a trailing period but SMB3 does not, so convert these using the normal Services For Mac mapping as we do for other reserved characters such as : < > | ? * This is similar to what Macs do for the same problem over SMB3. CC: Stable Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/cifs_unicode.c | 33 +++++++++++++++++++++++++++++---- fs/cifs/cifs_unicode.h | 2 ++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 5a53ac6b1e02..02b071bf3732 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_SLASH: *target = '\\'; break; + case SFM_SPACE: + *target = ' '; + break; + case SFM_PERIOD: + *target = '.'; + break; default: return false; } @@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char) return dest_char; } -static __le16 convert_to_sfm_char(char src_char) +static __le16 convert_to_sfm_char(char src_char, bool end_of_string) { __le16 dest_char; @@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char) case '|': dest_char = cpu_to_le16(SFM_PIPE); break; + case '.': + if (end_of_string) + dest_char = cpu_to_le16(SFM_PERIOD); + else + dest_char = 0; + break; + case ' ': + if (end_of_string) + dest_char = cpu_to_le16(SFM_SPACE); + else + dest_char = 0; + break; default: dest_char = 0; } @@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, /* see if we must remap this char */ if (map_chars == SFU_MAP_UNI_RSVD) dst_char = convert_to_sfu_char(src_char); - else if (map_chars == SFM_MAP_UNI_RSVD) - dst_char = convert_to_sfm_char(src_char); - else + else if (map_chars == SFM_MAP_UNI_RSVD) { + bool end_of_string; + + if (i == srclen - 1) + end_of_string = true; + else + end_of_string = false; + + dst_char = convert_to_sfm_char(src_char, end_of_string); + } else dst_char = 0; /* * FIXME: We can not handle remapping backslash (UNI_SLASH) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index bdc52cb9a676..479bc0a941f3 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,6 +64,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) +#define SFM_PERIOD ((__u16) 0xF028) +#define SFM_SPACE ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: [PATCH 0865/1050] Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 +-- arch/ia64/Kconfig | 2 +- arch/ia64/include/asm/thread_info.h | 8 ++--- arch/mn10300/include/asm/thread_info.h | 2 +- arch/mn10300/kernel/kgdb.c | 3 +- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 3 +- include/linux/sched.h | 2 +- init/main.c | 4 +-- kernel/fork.c | 50 +++++++++++++------------- 10 files changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e42ada26345..253538f29ade 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..826fd57fa3aa 100644 --- a/init/main.c +++ b/init/main.c @@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..37b9439b8c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - struct page *page = virt_to_page(ti); + struct page *page = virt_to_page(stack); memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, -(1 << THREAD_SIZE_ORDER)); __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); kcov_task_init(tsk); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; From aca9c293d098292579e345b2b39b394778d41526 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 16:55:53 -0700 Subject: [PATCH 0866/1050] x86: fix up a few misc stack pointer vs thread_info confusions As the actual pointer value is the same for the thread stack allocation and the thread_info, code that confused the two worked fine, but will break when the thread info is moved away from the stack allocation. It also looks very confusing. For example, the kprobe code wanted to know the current top of stack. To do that, it used this: (unsigned long)current_thread_info() + THREAD_SIZE which did indeed give the correct value. But it's not only a fairly nonsensical expression, it's also rather complex, especially since we actually have this: static inline unsigned long current_top_of_stack(void) which not only gives us the value we are interested in, but happens to be how "current_thread_info()" is currently defined as: (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); so using current_thread_info() to figure out the top of the stack really is a very round-about thing to do. The other cases are just simpler confusion about task_thread_info() vs task_stack_page(), which currently return the same pointer - but if you want the stack page, you really should be using the latter one. And there was one entirely unused assignment of the current stack to a thread_info pointer. All cleaned up to make more sense today, and make it easier to move the thread_info away from the stack in the future. No semantic changes. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kprobes.h | 11 +++++------ arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/irq_32.c | 2 -- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4421b5da409d..d1d1e5094c28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t; #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 #define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define CUR_STACK_SIZE(ADDR) \ + (current_top_of_stack() - (unsigned long)(ADDR)) +#define MIN_STACK_SIZE(ADDR) \ + (MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ? \ + MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d6209f3a69cb..ef8017ca5ba9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -80,7 +80,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = task_thread_info(task); + void *t = task_stack_page(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 38da8f29a9c8..c627bf8d98ad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -130,11 +130,9 @@ void irq_ctx_init(int cpu) void do_softirq_own_stack(void) { - struct thread_info *curstk; struct irq_stack *irqstk; u32 *isp, *prev_esp; - curstk = current_stack(); irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ From 7e8b3dfef16375dbfeb1f36a83eb9f27117c51fd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 23 Jun 2016 14:54:37 -0400 Subject: [PATCH 0867/1050] USB: EHCI: declare hostpc register as zero-length array The HOSTPC extension registers found in some EHCI implementations form a variable-length array, with one element for each port. Therefore the hostpc field in struct ehci_regs should be declared as a zero-length array, not a single-element array. This fixes a problem reported by UBSAN. Signed-off-by: Alan Stern Reported-by: Wilfried Klaebe Tested-by: Wilfried Klaebe CC: Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ehci_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index 966889a20ea3..e479033bd782 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -180,11 +180,11 @@ struct ehci_regs { * PORTSCx */ /* HOSTPC: offset 0x84 */ - u32 hostpc[1]; /* HOSTPC extension */ + u32 hostpc[0]; /* HOSTPC extension */ #define HOSTPC_PHCD (1<<22) /* Phy clock disable */ #define HOSTPC_PSPD (3<<25) /* Port speed detection */ - u32 reserved5[16]; + u32 reserved5[17]; /* USBMODE_EX: offset 0xc8 */ u32 usbmode_ex; /* USB Device mode extension */ From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: [PATCH 0868/1050] fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/init_task.c | 1 + include/linux/init_task.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f2cb8d45513d..f8834f820ec2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ From 491a1c65ae498dea0e39b24a46e528a78a8532ed Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 24 Jun 2016 14:48:35 -0700 Subject: [PATCH 0869/1050] mm,oom_reaper: don't call mmput_async() without atomic_inc_not_zero() Commit e2fe14564d33 ("oom_reaper: close race with exiting task") reduced frequency of needlessly selecting next OOM victim, but was calling mmput_async() when atomic_inc_not_zero() failed. Link: http://lkml.kernel.org/r/1464423365-5555-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index acbc432d1a52..be67df3b6569 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -478,6 +478,7 @@ static bool __oom_reap_task(struct task_struct *tsk) mm = p->mm; if (!atomic_inc_not_zero(&mm->mm_users)) { task_unlock(p); + mm = NULL; goto unlock_oom; } From 9df10fb7b80bc2f540956ba01b5e7ee1012001a5 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 24 Jun 2016 14:48:38 -0700 Subject: [PATCH 0870/1050] oom_reaper: avoid pointless atomic_inc_not_zero usage. Since commit 36324a990cf5 ("oom: clear TIF_MEMDIE after oom_reaper managed to unmap the address space") changed to use find_lock_task_mm() for finding a mm_struct to reap, it is guaranteed that mm->mm_users > 0 because find_lock_task_mm() returns a task_struct with ->mm != NULL. Therefore, we can safely use atomic_inc(). Link: http://lkml.kernel.org/r/1465024759-8074-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index be67df3b6569..ddf74487f848 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -474,14 +474,8 @@ static bool __oom_reap_task(struct task_struct *tsk) p = find_lock_task_mm(tsk); if (!p) goto unlock_oom; - mm = p->mm; - if (!atomic_inc_not_zero(&mm->mm_users)) { - task_unlock(p); - mm = NULL; - goto unlock_oom; - } - + atomic_inc(&mm->mm_users); task_unlock(p); if (!down_read_trylock(&mm->mmap_sem)) { From a7b50abc90afb2e3c27e1bd212643cc53eaf0b60 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 24 Jun 2016 14:48:40 -0700 Subject: [PATCH 0871/1050] selftests/vm/compaction_test: fix write to restore nr_hugepages The write at the end of the test to restore nr_hugepages to its previous value is failing. This is because it is trying to write the number of bytes in the char array as opposed to the number of bytes in the string. Link: http://lkml.kernel.org/r/1465331205-3284-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: Shuah Khan Cc: Sri Jayaramappa Cc: Eric B Munson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/compaction_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 932ff577ffc0..00c4f65d12da 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); - if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); goto close_fd; From b9b4bb26af017dbe930cd4df7f9b2fc3a0497bfe Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Fri, 24 Jun 2016 14:48:43 -0700 Subject: [PATCH 0872/1050] tmpfs: don't undo fallocate past its last page When fallocate is interrupted it will undo a range that extends one byte past its range of allocated pages. This can corrupt an in-use page by zeroing out its first byte. Instead, undo using the inclusive byte range. Fixes: 1635f6a74152f1d ("tmpfs: undo fallocation on failure") Link: http://lkml.kernel.org/r/1462713387-16724-1-git-send-email-anthony.romano@coreos.com Signed-off-by: Anthony Romano Cc: Vlastimil Babka Cc: Hugh Dickins Cc: Brandon Philips Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index a36144909b28..24463b67b6ef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2227,7 +2227,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, /* Remove the !PageUptodate pages we added */ shmem_undo_range(inode, (loff_t)start << PAGE_SHIFT, - (loff_t)index << PAGE_SHIFT, true); + ((loff_t)index << PAGE_SHIFT) - 1, true); goto undone; } From 32d6bd9059f265f617f6502c68dfbcae7e515add Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:47 -0700 Subject: [PATCH 0873/1050] tree wide: get rid of __GFP_REPEAT for order-0 allocations part I This is the third version of the patchset previously sent [1]. I have basically only rebased it on top of 4.7-rc1 tree and dropped "dm: get rid of superfluous gfp flags" which went through dm tree. I am sending it now because it is tree wide and chances for conflicts are reduced considerably when we want to target rc2. I plan to send the next step and rename the flag and move to a better semantic later during this release cycle so we will have a new semantic ready for 4.8 merge window hopefully. Motivation: While working on something unrelated I've checked the current usage of __GFP_REPEAT in the tree. It seems that a majority of the usage is and always has been bogus because __GFP_REPEAT has always been about costly high order allocations while we are using it for order-0 or very small orders very often. It seems that a big pile of them is just a copy&paste when a code has been adopted from one arch to another. I think it makes some sense to get rid of them because they are just making the semantic more unclear. Please note that GFP_REPEAT is documented as * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt * _might_ fail. This depends upon the particular VM implementation. while !costly requests have basically nofail semantic. So one could reasonably expect that order-0 request with __GFP_REPEAT will not loop for ever. This is not implemented right now though. I would like to move on with __GFP_REPEAT and define a better semantic for it. $ git grep __GFP_REPEAT origin/master | wc -l 111 $ git grep __GFP_REPEAT | wc -l 36 So we are down to the third after this patch series. The remaining places really seem to be relying on __GFP_REPEAT due to large allocation requests. This still needs some double checking which I will do later after all the simple ones are sorted out. I am touching a lot of arch specific code here and I hope I got it right but as a matter of fact I even didn't compile test for some archs as I do not have cross compiler for them. Patches should be quite trivial to review for stupid compile mistakes though. The tricky parts are usually hidden by macro definitions and thats where I would appreciate help from arch maintainers. [1] http://lkml.kernel.org/r/1461849846-27209-1-git-send-email-mhocko@kernel.org This patch (of 19): __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. Yet we have the full kernel tree with its usage for apparently order-0 allocations. This is really confusing because __GFP_REPEAT is explicitly documented to allow allocation failures which is a weaker semantic than the current order-0 has (basically nofail). Let's simply drop __GFP_REPEAT from those places. This would allow to identify place which really need allocator to retry harder and formulate a more specific semantic for what the flag is supposed to do actually. Link: http://lkml.kernel.org/r/1464599699-30131-2-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Theodore Ts'o" Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf [for tile] Cc: Guan Xuetao Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jan Kara Cc: John Crispin Cc: Lennox Wu Cc: Ley Foon Tan Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Ralf Baechle Cc: Rich Felker Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 4 ++-- arch/arm/include/asm/pgalloc.h | 2 +- arch/avr32/include/asm/pgalloc.h | 6 +++--- arch/cris/include/asm/pgalloc.h | 4 ++-- arch/frv/mm/pgalloc.c | 6 +++--- arch/hexagon/include/asm/pgalloc.h | 4 ++-- arch/m68k/include/asm/mcf_pgalloc.h | 4 ++-- arch/m68k/include/asm/motorola_pgalloc.h | 4 ++-- arch/m68k/include/asm/sun3_pgalloc.h | 4 ++-- arch/metag/include/asm/pgalloc.h | 5 ++--- arch/microblaze/include/asm/pgalloc.h | 4 ++-- arch/microblaze/mm/pgtable.c | 3 +-- arch/mn10300/mm/pgtable.c | 6 +++--- arch/openrisc/include/asm/pgalloc.h | 2 +- arch/openrisc/mm/ioremap.c | 2 +- arch/parisc/include/asm/pgalloc.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 +- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- arch/powerpc/mm/pgtable_32.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 3 +-- arch/sh/include/asm/pgalloc.h | 4 ++-- arch/sparc/mm/init_64.c | 6 ++---- arch/um/kernel/mem.c | 4 ++-- arch/x86/include/asm/pgalloc.h | 4 ++-- arch/x86/xen/p2m.c | 2 +- arch/xtensa/include/asm/pgalloc.h | 2 +- drivers/block/aoe/aoecmd.c | 2 +- 27 files changed, 47 insertions(+), 52 deletions(-) diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index aab14a019c20..c2ebb6f36c9d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return ret; } @@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 19cfab526d13..20febb368844 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -29,7 +29,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)get_zeroed_page(GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index 1aba19d68c5e..db039cb368be 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x) */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor); + return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 235ece437ddd..42f1affb9c2d 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 41907d25ed38..c9ed14f6c67d 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *page; #ifdef CONFIG_HIGHPTE - page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL, 0); #endif if (!page) return NULL; diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 77da3b0ae3c2..eeebf862c46c 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + pte = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { @@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; return (pte_t *) __get_free_page(flags); } diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index f9924fbcfe42..fb95aed5f428 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -14,7 +14,7 @@ extern const char bad_pmd_string[]; extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_DMA); if (!page) return NULL; @@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_DMA, 0); pte_t *pte; if (!page) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 24bcba496c75..c895b987202c 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (pte) { __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); @@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres struct page *page; pte_t *pte; - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if(!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 0931388de47f..1901f61f926f 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -37,7 +37,7 @@ do { \ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_KERNEL); if (!page) return NULL; @@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_KERNEL, 0); if (page == NULL) return NULL; diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 3104df0a4822..c2caa1ee4360 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | - __GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); return pte; } @@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 61436d69775c..7c89390c0c13 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *ptepage; #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; + int flags = GFP_KERNEL | __GFP_HIGHMEM; #else - int flags = GFP_KERNEL | __GFP_REPEAT; + int flags = GFP_KERNEL; #endif ptepage = alloc_pages(flags, 0); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 4f4520e779a5..eb99fcc76088 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | - __GFP_REPEAT | __GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } else { pte = (pte_t *)early_get_page(); if (pte) diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index e77a7c728081..9577cf768875 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; #ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); #endif if (!pte) return NULL; diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 21484e5b9e9a..87eebd185089 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); if (!pte) return NULL; clear_page(page_address(pte)); diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 62b08ef392be..5b2a95116e8f 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *pte; if (likely(mem_init_done)) { - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + pte = (pte_t *) __get_free_page(GFP_KERNEL); } else { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); #if 0 diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f2fd327dce2e..52c3defb40c9 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -124,7 +124,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { @@ -137,7 +137,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..049b80359db6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -151,7 +151,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 069369f6414b..8a8a7d991249 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -88,7 +88,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bf7bf32b54f8..7f922f557936 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add pte_t *pte; if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); } else { pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) @@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e009e0604a8a..f5e8d4edb808 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!kernel && !pgtable_page_ctor(page)) { diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index a33673b3687d..f3f42c84c40f 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 14bb0d5ed3c6..aec508e37490 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2704,8 +2704,7 @@ void __flush_tlb_all(void) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pte_t *pte = NULL; if (page) @@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b2a2dff50b4e..e7437ec62710 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } @@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index bf7f8b55b0f9..574c23cf761a 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; - page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab9f766bb06..dd2a49a8aacc 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void) if (unlikely(!slab_is_available())) return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); + return (void *)__get_free_page(GFP_KERNEL); } static void __ref free_p2m_page(void *p) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index d38eb9237e64..1065bc8bcae5 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *ptep; int i; - ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + ptep = (pte_t *)__get_free_page(GFP_KERNEL); if (!ptep) return NULL; for (i = 0; i < 1024; i++) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d597e432e195..ab19adb07a12 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1750,7 +1750,7 @@ aoecmd_init(void) int ret; /* get_zeroed_page returns page with ref count 1 */ - p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + p = (void *) get_zeroed_page(GFP_KERNEL); if (!p) return -ENOMEM; empty_page = virt_to_page(p); From a3a9a59d206779dc0c4ca5a6de6a2ff40382732b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:50 -0700 Subject: [PATCH 0874/1050] x86: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but none of the allocation which uses this flag is for more than order-0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-3-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 4d38416e2a7f..04f89caef9c4 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -57,7 +57,7 @@ # error "Need more than one PGD for the ESPFIX hack" #endif -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* This contains the *bottom* address of the espfix stack */ DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 4eb287e25043..aa0ff4b02a96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,7 +6,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM From f58f230a832ba8220a64f44aaafcce4b7358d826 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:53 -0700 Subject: [PATCH 0875/1050] x86/efi: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. efi_alloc_page_tables uses __GFP_REPEAT but it allocates an order-0 page. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-4-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6e7242be1c87..b226b3f497f1 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; From f3610a6aff7dd70b788364255c0cbc128488ef72 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:56 -0700 Subject: [PATCH 0876/1050] arm64: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pte,pmd,pud}_alloc_one{_kernel}, late_pgtable_alloc use PGALLOC_GFP for __get_free_page (aka order-0). pgd_alloc is slightly more complex because it allocates from pgd_cache if PGD_SIZE != PAGE_SIZE and PGD_SIZE depends on the configuration (CONFIG_ARM64_VA_BITS, PAGE_SHIFT and CONFIG_PGTABLE_LEVELS). As per config PGTABLE_LEVELS int default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 we should have the following options CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:16k size:16 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:64k size:512 pages:1 CONFIG_ARM64_VA_BITS:47 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:16k size:16384 pages:1 CONFIG_ARM64_VA_BITS:42 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:64k size:65536 pages:1 CONFIG_ARM64_VA_BITS:39 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:36 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:16k size:16384 pages:1 All of them fit into a single page (aka order-0). This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-6-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Will Deacon Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ff98585d085a..d25f4f137c2a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 From 54d87d600adbe9889bccaff38420cec02250993b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:58 -0700 Subject: [PATCH 0877/1050] arc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one_kernel uses __get_order_pte but this is obviously always zero because BITS_FOR_PTE is not larger than 9 yet the page size is always larger than 4K. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-7-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 86ed671286df..3749234b7419 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, + pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, __get_order_pte()); return pte; @@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); if (!pte_pg) return 0; memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); From 65f84656ff7c24177c43652bc88cc2a06f9a48b1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:01 -0700 Subject: [PATCH 0878/1050] mips: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel}, pmd_alloc_one allocate PTE_ORDER resp. PMD_ORDER but both are not larger than 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-8-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: John Crispin Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgalloc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index b336037e8768..93c079a1cfc8 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); @@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER); + pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; From 565299d03363c71d5bcf7edabb41b2b36a9ea36e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:04 -0700 Subject: [PATCH 0879/1050] nios2: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel} allocate PTE_ORDER which is 0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-9-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Ley Foon Tan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nios2/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 6e2985e0a7b9..bb47d08c8ef7 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (pte) { if (!pgtable_page_ctor(pte)) { __free_page(pte); From aade311a50b0be5d5ee93bac7ebc2da9a16556d7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:06 -0700 Subject: [PATCH 0880/1050] parisc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pmd_alloc_one allocate PMD_ORDER which is 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-10-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 52c3defb40c9..f08dda3f0995 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, - PMD_ORDER); + pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) memset(pmd, 0, PAGE_SIZE< Date: Fri, 24 Jun 2016 14:49:09 -0700 Subject: [PATCH 0881/1050] score: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel} allocate PTE_ORDER which is 0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-11-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 2e067657db98..49b012d78c1a 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); From 2379a23e34b58520dfc8f4909f116a08393138e4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:12 -0700 Subject: [PATCH 0882/1050] powerpc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one are allocating from {PGT,PUD}_CACHE initialized in pgtable_cache_init which doesn't have larger than sizeof(void *) << 12 size and that fits into !costly allocation request size. PGALLOC_GFP is used only in radix__pgd_alloc which uses either order-0 or order-4 requests. The first one doesn't need the flag while the second does. Drop __GFP_REPEAT from PGALLOC_GFP and add it for the order-4 one. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-12-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 10 ++++------ arch/powerpc/include/asm/nohash/64/pgalloc.h | 6 ++---- arch/powerpc/mm/hugetlbpage.c | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 049b80359db6..d14fcf82c00c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(PGALLOC_GFP); #else struct page *page; - page = alloc_pages(PGALLOC_GFP, 4); + page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4); if (!page) return NULL; return (pgd_t *) page_address(page); @@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -115,8 +114,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 8a8a7d991249..897d2e1c8a9b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3f86cd..119d18611500 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, cachep = PGT_CACHE(pdshift - pshift); #endif - new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); + new = kmem_cache_zalloc(cachep, GFP_KERNEL); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); From 45eeff260d40ff02af3d5b8e2919033ee59f9ff6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:14 -0700 Subject: [PATCH 0883/1050] sparc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one is using __GFP_REPEAT but it always allocates from pgtable_cache which is initialzed to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-13-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgalloc_64.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 5e3187185b4a..3529f1378cd8 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) From 10d58bf297e2cba0cfa2cd143d4f0df26e129040 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:17 -0700 Subject: [PATCH 0884/1050] s390: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. page_table_alloc then uses the flag for a single page allocation. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-14-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/pgalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e8b5962ac12a..e2565d2d0c32 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) return table; } /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (!page) return NULL; if (!pgtable_page_ctor(page)) { From 884ed4cb8aa19ccff32f5c5586257c56e56f91a4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:20 -0700 Subject: [PATCH 0885/1050] sh: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but {pgd,pmd}_alloc allocate from {pgd,pmd}_cache but both caches are allocating up to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-15-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 26e03a1f7ca4..a62bd8696779 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -1,7 +1,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 From f45eebc25e78991ef6a6d784ab54151d3003cfdf Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:22 -0700 Subject: [PATCH 0886/1050] tile: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pgtable_alloc_one uses __GFP_REPEAT flag for L2_USER_PGTABLE_ORDER but the order is either 0 or 3 if L2_KERNEL_PGTABLE_SHIFT for HPAGE_SHIFT. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-16-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Chris Metcalf [for tile] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 7bf2491a9c1f..c4d5bf841a7f 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, int order) { - gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; + gfp_t flags = GFP_KERNEL|__GFP_ZERO; struct page *p; int i; From a830627b01b26452a13abb7e7b37d39365be4b05 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:25 -0700 Subject: [PATCH 0887/1050] unicore32: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but it is only used in pte_alloc_one, pte_alloc_one_kernel which does order-0 request. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-17-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 2e02d1356fdf..26775793c204 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* * Allocate one PTE table. From f2db19719a4e789a58ac024b43f12eeb9e458074 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:28 -0700 Subject: [PATCH 0888/1050] jbd2: get rid of superfluous __GFP_REPEAT jbd2_alloc is explicit about its allocation preferences wrt. the allocation size. Sub page allocations go to the slab allocator and larger are using either the page allocator or vmalloc. This is all good but the logic is unnecessarily complex. 1) as per Ted, the vmalloc fallback is a left-over: : jbd2_alloc is only passed in the bh->b_size, which can't be PAGE_SIZE, so : the code path that calls vmalloc() should never get called. When we : conveted jbd2_alloc() to suppor sub-page size allocations in commit : d2eecb039368, there was an assumption that it could be called with a size : greater than PAGE_SIZE, but that's certaily not true today. Moreover vmalloc allocation might even lead to a deadlock because the callers expect GFP_NOFS context while vmalloc is GFP_KERNEL. 2) __GFP_REPEAT for requests <= PAGE_ALLOC_COSTLY_ORDER is ignored since the flag was introduced. Let's simplify the code flow and use the slab allocator for sub-page requests and the page allocator for others. Even though order > 0 is not currently used as per above leave that option open. Link: http://lkml.kernel.org/r/1464599699-30131-18-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Jan Kara Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b31852f76f46..e3ca4b4cac84 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) BUG_ON(size & (size-1)); /* Must be a power of 2 */ - flags |= __GFP_REPEAT; - if (size == PAGE_SIZE) - ptr = (void *)__get_free_pages(flags, 0); - else if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - ptr = (void *)__get_free_pages(flags, order); - else - ptr = vmalloc(size); - } else + if (size < PAGE_SIZE) ptr = kmem_cache_alloc(get_slab(size), flags); + else + ptr = (void *)__get_free_pages(flags, get_order(size)); /* Check alignment; SLUB has gotten this wrong in the past, * and this can lead to user data corruption! */ @@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) void jbd2_free(void *ptr, size_t size) { - if (size == PAGE_SIZE) { - free_pages((unsigned long)ptr, 0); - return; - } - if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - free_pages((unsigned long)ptr, order); - else - vfree(ptr); - return; - } - kmem_cache_free(get_slab(size), ptr); + if (size < PAGE_SIZE) + kmem_cache_free(get_slab(size), ptr); + else + free_pages((unsigned long)ptr, get_order(size)); }; /* From a6921c2974a09bfe8d039980c0b14a305644930b Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 24 Jun 2016 14:49:31 -0700 Subject: [PATCH 0889/1050] MAINTAINERS: update Calgary IOMMU Update the contact info for Muli, clean-up my name, and update the mailing list to the IOMMU mailing list. Link: http://lkml.kernel.org/r/1465493059-11840-2-git-send-email-jdmason@kudzu.us Signed-off-by: Jon Mason Cc: Muli Ben-Yehuda Cc: Greg Kroah-Hartman Cc: Krzysztof Kozlowski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e1b090f86e0d..952fd2aba7b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2776,9 +2776,9 @@ F: include/net/caif/ F: net/caif/ CALGARY x86-64 IOMMU -M: Muli Ben-Yehuda -M: "Jon D. Mason" -L: discuss@x86-64.org +M: Muli Ben-Yehuda +M: Jon Mason +L: iommu@lists.linux-foundation.org S: Maintained F: arch/x86/kernel/pci-calgary_64.c F: arch/x86/kernel/tce_64.c From 9b75a867cc9ddbafcaf35029358ac500f2635ff3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Jun 2016 14:49:34 -0700 Subject: [PATCH 0890/1050] mm: mempool: kasan: don't poot mempool objects in quarantine Currently we may put reserved by mempool elements into quarantine via kasan_kfree(). This is totally wrong since quarantine may really free these objects. So when mempool will try to use such element, use-after-free will happen. Or mempool may decide that it no longer need that element and double-free it. So don't put object into quarantine in kasan_kfree(), just poison it. Rename kasan_kfree() to kasan_poison_kfree() to respect that. Also, we shouldn't use kasan_slab_alloc()/kasan_krealloc() in kasan_unpoison_element() because those functions may update allocation stacktrace. This would be wrong for the most of the remove_element call sites. (The only call site where we may want to update alloc stacktrace is in mempool_alloc(). Kmemleak solves this by calling kmemleak_update_trace(), so we could make something like that too. But this is out of scope of this patch). Fixes: 55834c59098d ("mm: kasan: initial memory quarantine implementation") Link: http://lkml.kernel.org/r/575977C3.1010905@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Kuthonuzo Luruo Acked-by: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Kostya Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 11 +++++++---- mm/kasan/kasan.c | 6 +++--- mm/mempool.c | 12 ++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 611927f5870d..ac4b3c46a84d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); -void kasan_kfree(void *ptr); +void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object); -void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -76,6 +75,9 @@ struct kasan_cache { int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +size_t ksize(const void *); +static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -static inline void kasan_kfree(void *ptr) {} +static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, @@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object) { return false; } -static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline void kasan_unpoison_slab(const void *ptr) { } + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 28439acda6ec..6845f9294696 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags) kasan_kmalloc(cache, object, cache->object_size, flags); } -void kasan_poison_slab_free(struct kmem_cache *cache, void *object) +static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) { unsigned long size = cache->object_size; unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); @@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags) kasan_kmalloc(page->slab_cache, object, size, flags); } -void kasan_kfree(void *ptr) +void kasan_poison_kfree(void *ptr) { struct page *page; @@ -636,7 +636,7 @@ void kasan_kfree(void *ptr) kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), KASAN_FREE_PAGE); else - kasan_slab_free(page->slab_cache, ptr); + kasan_poison_slab_free(page->slab_cache, ptr); } void kasan_kfree_large(const void *ptr) diff --git a/mm/mempool.c b/mm/mempool.c index 9e075f829d0d..8f65464da5de 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element) static void kasan_poison_element(mempool_t *pool, void *element) { - if (pool->alloc == mempool_alloc_slab) - kasan_poison_slab_free(pool->pool_data, element); - if (pool->alloc == mempool_kmalloc) - kasan_kfree(element); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_poison_kfree(element); if (pool->alloc == mempool_alloc_pages) kasan_free_pages(element, (unsigned long)pool->pool_data); } static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags) { - if (pool->alloc == mempool_alloc_slab) - kasan_slab_alloc(pool->pool_data, element, flags); - if (pool->alloc == mempool_kmalloc) - kasan_krealloc(element, (size_t)pool->pool_data, flags); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_unpoison_slab(element); if (pool->alloc == mempool_alloc_pages) kasan_alloc_pages(element, (unsigned long)pool->pool_data); } From e838a45f9392a5bd2be1cd3ab0b16ae85857461c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 24 Jun 2016 14:49:37 -0700 Subject: [PATCH 0891/1050] mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask Commit d0164adc89f6 ("mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd") modified __GFP_WAIT to explicitly identify the difference between atomic callers and those that were unwilling to sleep. Later the definition was removed entirely. The GFP_RECLAIM_MASK is the set of flags that affect watermark checking and reclaim behaviour but __GFP_ATOMIC was never added. Without it, atomic users of the slab allocator strip the __GFP_ATOMIC flag and cannot access the page allocator atomic reserves. This patch addresses the problem. The user-visible impact depends on the workload but potentially atomic allocations unnecessarily fail without this path. Link: http://lkml.kernel.org/r/20160610093832.GK2527@techsingularity.net Signed-off-by: Mel Gorman Reported-by: Marcin Wojtas Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/internal.h b/mm/internal.h index a37e5b6f9d25..2524ec880e24 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,7 +24,8 @@ */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ - __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) + __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ + __GFP_ATOMIC) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) From a8a47ff53462c3043778c04b3ba7230a39c476bf Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 24 Jun 2016 14:49:39 -0700 Subject: [PATCH 0892/1050] mailmap: add Antoine Tenart's email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I used "Antoine Ténart" at first but then moved to a name without accent as this cause some issues from time to time... Add my email in the mailmap file to have a consistent shortlog output. Link: http://lkml.kernel.org/r/20160609130323.27706-1-antoine.tenart@free-electrons.com Signed-off-by: Antoine Tenart Cc: Antoine Tenart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 779a9caadc7f..726b49435224 100644 --- a/.mailmap +++ b/.mailmap @@ -21,6 +21,7 @@ Andrey Ryabinin Andrew Morton Andrew Vasquez Andy Adamson +Antoine Tenart Antonio Ospite Archit Taneja Arnaud Patard From 1f08fe266560fc2d1383fd9c8c08fdd432ea302b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 24 Jun 2016 14:49:42 -0700 Subject: [PATCH 0893/1050] mailmap: add Boris Brezillon's email There are different versions of Boris' name and email in the log, and one typo. Add his emails in mailmap to have all of his contributions under the same name/email tuple. Link: http://lkml.kernel.org/r/20160609130323.27706-2-antoine.tenart@free-electrons.com Signed-off-by: Antoine Tenart Acked-by: Boris Brezillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index 726b49435224..52489f564069 100644 --- a/.mailmap +++ b/.mailmap @@ -31,6 +31,9 @@ Axel Lin Ben Gardner Ben M Cahill Björn Steinbrink +Boris Brezillon +Boris Brezillon +Boris Brezillon Brian Avery Brian King Christoph Hellwig From 315d09bf30c2b436a1fdac86d31c24380cd56c4f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Jun 2016 14:49:45 -0700 Subject: [PATCH 0894/1050] Revert "mm: make faultaround produce old ptes" This reverts commit 5c0a85fad949212b3e059692deecdeed74ae7ec7. The commit causes ~6% regression in unixbench. Let's revert it for now and consider other solution for reclaim problem later. Link: http://lkml.kernel.org/r/1465893750-44080-2-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: "Huang, Ying" Cc: Linus Torvalds Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vinayak Menon Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/filemap.c | 2 +- mm/memory.c | 23 +++++------------------ 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5df5feb49575..ece042dfe23c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -602,7 +602,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old); + struct page *page, pte_t *pte, bool write, bool anon); #endif /* diff --git a/mm/filemap.c b/mm/filemap.c index 00ae878b2a38..20f3b1f33f0e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2186,7 +2186,7 @@ repeat: if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; - do_set_pte(vma, addr, page, pte, false, false, true); + do_set_pte(vma, addr, page, pte, false, false); unlock_page(page); goto next; unlock: diff --git a/mm/memory.c b/mm/memory.c index 15322b73636b..61fe7e7b56bf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2877,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, * vm_ops->map_pages. */ void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old) + struct page *page, pte_t *pte, bool write, bool anon) { pte_t entry; @@ -2885,8 +2885,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (old) - entry = pte_mkold(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address, false); @@ -3032,20 +3030,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); + do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) goto unlock_out; - do_fault_around(vma, address, pte, pgoff, flags); - /* Check if the fault is handled by faultaround */ - if (!pte_same(*pte, orig_pte)) { - /* - * Faultaround produce old pte, but the pte we've - * handler fault for should be young. - */ - pte_t entry = pte_mkyoung(*pte); - if (ptep_set_access_flags(vma, address, pte, entry, 0)) - update_mmu_cache(vma, address, pte); - goto unlock_out; - } pte_unmap_unlock(pte, ptl); } @@ -3060,7 +3047,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, false, false, false); + do_set_pte(vma, address, fault_page, pte, false, false); unlock_page(fault_page); unlock_out: pte_unmap_unlock(pte, ptl); @@ -3111,7 +3098,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } goto uncharge_out; } - do_set_pte(vma, address, new_page, pte, true, true, false); + do_set_pte(vma, address, new_page, pte, true, true); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); @@ -3164,7 +3151,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, true, false, false); + do_set_pte(vma, address, fault_page, pte, true, false); pte_unmap_unlock(pte, ptl); if (set_page_dirty(fault_page)) From 06d8fbc7cfd655d202cfac9870bd79c18c2db698 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Jun 2016 14:49:48 -0700 Subject: [PATCH 0895/1050] Revert "mm: disable fault around on emulated access bit architecture" This reverts commit d0834a6c2c5b0c76cfb806bd7dba6556d8b4edbb. After revert of 5c0a85fad949 ("mm: make faultaround produce old ptes") faultaround doesn't have dependencies on hardware accessed bit, so let's revert this one too. Link: http://lkml.kernel.org/r/1465893750-44080-3-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: "Huang, Ying" Cc: Linus Torvalds Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vinayak Menon Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 61fe7e7b56bf..cd1f29e4897e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2898,16 +2898,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -/* - * If architecture emulates "accessed" or "young" bit without HW support, - * there is no much gain with fault_around. - */ static unsigned long fault_around_bytes __read_mostly = -#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS - PAGE_SIZE; -#else rounddown_pow_of_two(65536); -#endif #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) From c17b1f42594eb71b8d3eb5a6dfc907a7eb88a51d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Jun 2016 14:49:51 -0700 Subject: [PATCH 0896/1050] hugetlb: fix nr_pmds accounting with shared page tables We account HugeTLB's shared page table to all processes who share it. The accounting happens during huge_pmd_share(). If somebody populates pud entry under us, we should decrease pagetable's refcount and decrease nr_pmds of the process. By mistake, I increase nr_pmds again in this case. :-/ It will lead to "BUG: non-zero nr_pmds on freeing mm: 2" on process' exit. Let's fix this by increasing nr_pmds only when we're sure that the page table will be used. Link: http://lkml.kernel.org/r/20160617122506.GC6534@node.shutemov.name Fixes: dc6c9a35b66b ("mm: account pmd page tables to the process") Signed-off-by: Kirill A. Shutemov Reported-by: zhongjiang Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 388c2bb9b55c..ee0302169959 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4228,7 +4228,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (saddr) { spte = huge_pte_offset(svma->vm_mm, saddr); if (spte) { - mm_inc_nr_pmds(mm); get_page(virt_to_page(spte)); break; } @@ -4243,9 +4242,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (pud_none(*pud)) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); + mm_inc_nr_pmds(mm); } else { put_page(virt_to_page(spte)); - mm_inc_nr_pmds(mm); } spin_unlock(ptl); out: From d93c4130a7d049b234b5d5a15808eaf5406f2789 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Jun 2016 14:49:54 -0700 Subject: [PATCH 0897/1050] memcg: mem_cgroup_migrate() may be called with irq disabled mem_cgroup_migrate() uses local_irq_disable/enable() but can be called with irq disabled from migrate_page_copy(). This ends up enabling irq while holding a irq context lock triggering the following lockdep warning. Fix it by using irq_save/restore instead. ================================= [ INFO: inconsistent lock state ] 4.7.0-rc1+ #52 Tainted: G W --------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. kcompactd0/151 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&ctx->completion_lock)->rlock){+.?.-.}, at: [<000000000038fd96>] aio_migratepage+0x156/0x1e8 {IN-SOFTIRQ-W} state was registered at: __lock_acquire+0x5b6/0x1930 lock_acquire+0xee/0x270 _raw_spin_lock_irqsave+0x66/0xb0 aio_complete+0x98/0x328 dio_complete+0xe4/0x1e0 blk_update_request+0xd4/0x450 scsi_end_request+0x48/0x1c8 scsi_io_completion+0x272/0x698 blk_done_softirq+0xca/0xe8 __do_softirq+0xc8/0x518 irq_exit+0xee/0x110 do_IRQ+0x6a/0x88 io_int_handler+0x11a/0x25c __mutex_unlock_slowpath+0x144/0x1d8 __mutex_unlock_slowpath+0x140/0x1d8 kernfs_iop_permission+0x64/0x80 __inode_permission+0x9e/0xf0 link_path_walk+0x6e/0x510 path_lookupat+0xc4/0x1a8 filename_lookup+0x9c/0x160 user_path_at_empty+0x5c/0x70 SyS_readlinkat+0x68/0x140 system_call+0xd6/0x270 irq event stamp: 971410 hardirqs last enabled at (971409): migrate_page_move_mapping+0x3ea/0x588 hardirqs last disabled at (971410): _raw_spin_lock_irqsave+0x3c/0xb0 softirqs last enabled at (970526): __do_softirq+0x460/0x518 softirqs last disabled at (970519): irq_exit+0xee/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&ctx->completion_lock)->rlock); lock(&(&ctx->completion_lock)->rlock); *** DEADLOCK *** 3 locks held by kcompactd0/151: #0: (&(&mapping->private_lock)->rlock){+.+.-.}, at: aio_migratepage+0x42/0x1e8 #1: (&ctx->ring_lock){+.+.+.}, at: aio_migratepage+0x5a/0x1e8 #2: (&(&ctx->completion_lock)->rlock){+.?.-.}, at: aio_migratepage+0x156/0x1e8 stack backtrace: CPU: 20 PID: 151 Comm: kcompactd0 Tainted: G W 4.7.0-rc1+ #52 Call Trace: show_trace+0xea/0xf0 show_stack+0x72/0xf0 dump_stack+0x9a/0xd8 print_usage_bug.part.27+0x2d4/0x2e8 mark_lock+0x17e/0x758 mark_held_locks+0xa2/0xd0 trace_hardirqs_on_caller+0x140/0x1c0 mem_cgroup_migrate+0x266/0x370 aio_migratepage+0x16a/0x1e8 move_to_new_page+0xb0/0x260 migrate_pages+0x8f4/0x9f0 compact_zone+0x4dc/0xdc8 kcompactd_do_work+0x1aa/0x358 kcompactd+0xba/0x2c8 kthread+0x10a/0x110 kernel_thread_starter+0x6/0xc kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Link: http://lkml.kernel.org/r/20160620184158.GO3262@mtj.duckdns.org Link: http://lkml.kernel.org/g/5767CFE5.7080904@de.ibm.com Fixes: 74485cf2bc85 ("mm: migrate: consolidate mem_cgroup_migrate() calls") Signed-off-by: Tejun Heo Reported-by: Christian Borntraeger Acked-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75e74408cc8f..d4a33e1b77d7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5544,6 +5544,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) struct mem_cgroup *memcg; unsigned int nr_pages; bool compound; + unsigned long flags; VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); @@ -5574,10 +5575,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) commit_charge(newpage, memcg, false); - local_irq_disable(); + local_irq_save(flags); mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); memcg_check_events(memcg, newpage); - local_irq_enable(); + local_irq_restore(flags); } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); From ea3a9645866e12d2b198434f03df3c3e96fb86ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Jun 2016 14:49:58 -0700 Subject: [PATCH 0898/1050] memcg: css_alloc should return an ERR_PTR value on error mem_cgroup_css_alloc() was returning NULL on failure while cgroup core expected it to return an ERR_PTR value leading to the following NULL deref after a css allocation failure. Fix it by return ERR_PTR(-ENOMEM) instead. I'll also update cgroup core so that it can handle NULL returns. mkdir: page allocation failure: order:6, mode:0x240c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO) CPU: 0 PID: 8738 Comm: mkdir Not tainted 4.7.0-rc3+ #123 ... Call Trace: dump_stack+0x68/0xa1 warn_alloc_failed+0xd6/0x130 __alloc_pages_nodemask+0x4c6/0xf20 alloc_pages_current+0x66/0xe0 alloc_kmem_pages+0x14/0x80 kmalloc_order_trace+0x2a/0x1a0 __kmalloc+0x291/0x310 memcg_update_all_caches+0x6c/0x130 mem_cgroup_css_alloc+0x590/0x610 cgroup_apply_control_enable+0x18b/0x370 cgroup_mkdir+0x1de/0x2e0 kernfs_iop_mkdir+0x55/0x80 vfs_mkdir+0xb9/0x150 SyS_mkdir+0x66/0xd0 do_syscall_64+0x53/0x120 entry_SYSCALL64_slow_path+0x25/0x25 ... BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 IP: init_and_link_css+0x37/0x220 PGD 34b1e067 PUD 3a109067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: CPU: 0 PID: 8738 Comm: mkdir Not tainted 4.7.0-rc3+ #123 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.2-20160422_131301-anatol 04/01/2014 task: ffff88007cbc5200 ti: ffff8800666d4000 task.ti: ffff8800666d4000 RIP: 0010:[] [] init_and_link_css+0x37/0x220 RSP: 0018:ffff8800666d7d90 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffffffff810f2499 RSI: 0000000000000000 RDI: 0000000000000008 RBP: ffff8800666d7db8 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88005a5fb400 R13: ffffffff81f0f8a0 R14: ffff88005a5fb400 R15: 0000000000000010 FS: 00007fc944689700(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3aed0d2b80 CR3: 000000003a1e8000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: cgroup_apply_control_enable+0x1ac/0x370 cgroup_mkdir+0x1de/0x2e0 kernfs_iop_mkdir+0x55/0x80 vfs_mkdir+0xb9/0x150 SyS_mkdir+0x66/0xd0 do_syscall_64+0x53/0x120 entry_SYSCALL64_slow_path+0x25/0x25 Code: 89 f5 48 89 fb 49 89 d4 48 83 ec 08 8b 05 72 3b d8 00 85 c0 0f 85 60 01 00 00 4c 89 e7 e8 72 f7 ff ff 48 8d 7b 08 48 89 d9 31 c0 <48> c7 83 d0 00 00 00 00 00 00 00 48 83 e7 f8 48 29 f9 81 c1 d8 RIP init_and_link_css+0x37/0x220 RSP CR2: 00000000000000d0 ---[ end trace a2d8836ae1e852d1 ]--- Link: http://lkml.kernel.org/r/20160621165740.GJ3262@mtj.duckdns.org Signed-off-by: Tejun Heo Reported-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4a33e1b77d7..ac8664db3823 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4203,7 +4203,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: mem_cgroup_free(memcg); - return NULL; + return ERR_PTR(-ENOMEM); } static int From 8f182270dfec432e93fae14f9208a6b9af01009f Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Fri, 24 Jun 2016 14:50:01 -0700 Subject: [PATCH 0899/1050] mm/swap.c: flush lru pvecs on compound page arrival Currently we can have compound pages held on per cpu pagevecs, which leads to a lot of memory unavailable for reclaim when needed. In the systems with hundreads of processors it can be GBs of memory. On of the way of reproducing the problem is to not call munmap explicitly on all mapped regions (i.e. after receiving SIGTERM). After that some pages (with THP enabled also huge pages) may end up on lru_add_pvec, example below. void main() { #pragma omp parallel { size_t size = 55 * 1000 * 1000; // smaller than MEM/CPUS void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS , -1, 0); if (p != MAP_FAILED) memset(p, 0, size); //munmap(p, size); // uncomment to make the problem go away } } When we run it with THP enabled it will leave significant amount of memory on lru_add_pvec. This memory will be not reclaimed if we hit OOM, so when we run above program in a loop: for i in `seq 100`; do ./a.out; done many processes (95% in my case) will be killed by OOM. The primary point of the LRU add cache is to save the zone lru_lock contention with a hope that more pages will belong to the same zone and so their addition can be batched. The huge page is already a form of batched addition (it will add 512 worth of memory in one go) so skipping the batching seems like a safer option when compared to a potential excess in the caching which can be quite large and much harder to fix because lru_add_drain_all is way to expensive and it is not really clear what would be a good moment to call it. Similarly we can reproduce the problem on lru_deactivate_pvec by adding: madvise(p, size, MADV_FREE); after memset. This patch flushes lru pvecs on compound page arrival making the problem less severe - after applying it kill rate of above example drops to 0%, due to reducing maximum amount of memory held on pvec from 28MB (with THP) to 56kB per CPU. Suggested-by: Michal Hocko Link: http://lkml.kernel.org/r/1466180198-18854-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Lukasz Odzioba Acked-by: Michal Hocko Cc: Kirill Shutemov Cc: Andrea Arcangeli Cc: Vladimir Davydov Cc: Ming Li Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index 59f5fafa6e1f..90530ff8ed16 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -242,7 +242,7 @@ void rotate_reclaimable_page(struct page *page) get_page(page); local_irq_save(flags); pvec = this_cpu_ptr(&lru_rotate_pvecs); - if (!pagevec_add(pvec, page)) + if (!pagevec_add(pvec, page) || PageCompound(page)) pagevec_move_tail(pvec); local_irq_restore(flags); } @@ -296,7 +296,7 @@ void activate_page(struct page *page) struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); get_page(page); - if (!pagevec_add(pvec, page)) + if (!pagevec_add(pvec, page) || PageCompound(page)) pagevec_lru_move_fn(pvec, __activate_page, NULL); put_cpu_var(activate_page_pvecs); } @@ -391,9 +391,8 @@ static void __lru_cache_add(struct page *page) struct pagevec *pvec = &get_cpu_var(lru_add_pvec); get_page(page); - if (!pagevec_space(pvec)) + if (!pagevec_add(pvec, page) || PageCompound(page)) __pagevec_lru_add(pvec); - pagevec_add(pvec, page); put_cpu_var(lru_add_pvec); } @@ -628,7 +627,7 @@ void deactivate_file_page(struct page *page) if (likely(get_page_unless_zero(page))) { struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); - if (!pagevec_add(pvec, page)) + if (!pagevec_add(pvec, page) || PageCompound(page)) pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); put_cpu_var(lru_deactivate_file_pvecs); } @@ -648,7 +647,7 @@ void deactivate_page(struct page *page) struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); get_page(page); - if (!pagevec_add(pvec, page)) + if (!pagevec_add(pvec, page) || PageCompound(page)) pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); put_cpu_var(lru_deactivate_pvecs); } From c8cc708a340cc7c5445565079fd4d1c28898d7a2 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 24 Jun 2016 14:50:04 -0700 Subject: [PATCH 0900/1050] mm/hugetlb: clear compound_mapcount when freeing gigantic pages While working on s390 support for gigantic hugepages I ran into the following "Bad page state" warning when freeing gigantic pages: BUG: Bad page state in process bash pfn:580001 page:000003d116000040 count:0 mapcount:0 mapping:ffffffff00000000 index:0x0 flags: 0x7fffc0000000000() page dumped because: non-NULL mapping This is because page->compound_mapcount, which is part of a union with page->mapping, is initialized with -1 in prep_compound_gigantic_page(), and not cleared again during destroy_compound_gigantic_page(). Fix this by clearing the compound_mapcount in destroy_compound_gigantic_page() before clearing compound_head. Interestingly enough, the warning will not show up on x86_64, although this should not be architecture specific. Apparently there is an endianness issue, combined with the fact that the union contains both a 64 bit ->mapping pointer and a 32 bit atomic_t ->compound_mapcount as members. The resulting bogus page->mapping on x86_64 therefore contains 00000000ffffffff instead of ffffffff00000000 on s390, which will falsely trigger the PageAnon() check in free_pages_prepare() because page->mapping & PAGE_MAPPING_ANON is true on little-endian architectures like x86_64 in this case (the page is not compound anymore, ->compound_head was already cleared before). As a result, page->mapping will be cleared before doing the checks in free_pages_check(). Not sure if the bogus "PageAnon() returning true" on x86_64 for the first tail page of a gigantic page (at this stage) has other theoretical implications, but they would also be fixed with this patch. Link: http://lkml.kernel.org/r/1466612719-5642-1-git-send-email-gerald.schaefer@de.ibm.com Signed-off-by: Gerald Schaefer Reviewed-by: Mike Kravetz Cc: Luiz Capitulino Cc: Naoya Horiguchi Cc: Hillf Danton Cc: "Kirill A . Shutemov" Cc: Dave Hansen Cc: Paul Gortmaker Cc: "Aneesh Kumar K . V" Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ee0302169959..c1f3c0be150a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1030,6 +1030,7 @@ static void destroy_compound_gigantic_page(struct page *page, int nr_pages = 1 << order; struct page *p = page + 1; + atomic_set(compound_mapcount_ptr(page), 0); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { clear_compound_head(p); set_page_refcounted(p); From 5c335fe020ea287b2b49cc4dfca9f6756b88bb71 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 24 Jun 2016 14:50:07 -0700 Subject: [PATCH 0901/1050] mm: prevent KASAN false positives in kmemleak When kmemleak dumps contents of leaked objects it reads whole objects regardless of user-requested size. This upsets KASAN. Disable KASAN checks around object dump. Link: http://lkml.kernel.org/r/1466617631-68387-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e6429926e957..04320d3adbef 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -307,8 +307,10 @@ static void hex_dump_object(struct seq_file *seq, len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE); seq_printf(seq, " hex dump (first %zu bytes):\n", len); + kasan_disable_current(); seq_hex_dump(seq, " ", DUMP_PREFIX_NONE, HEX_ROW_SIZE, HEX_GROUP_SIZE, ptr, len, HEX_ASCII); + kasan_enable_current(); } /* From a4f04f2c6955aff5e2c08dcb40aca247ff4d7370 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 24 Jun 2016 14:50:10 -0700 Subject: [PATCH 0902/1050] mm, compaction: abort free scanner if split fails If the memory compaction free scanner cannot successfully split a free page (only possible due to per-zone low watermark), terminate the free scanner rather than continuing to scan memory needlessly. If the watermark is insufficient for a free page of order <= cc->order, then terminate the scanner since all future splits will also likely fail. This prevents the compaction freeing scanner from scanning all memory on very large zones (very noticeable for zones > 128GB, for instance) when all splits will likely fail while holding zone->lock. compaction_alloc() iterating a 128GB zone has been benchmarked to take over 400ms on some systems whereas any free page isolated and ready to be split ends up failing in split_free_page() because of the low watermark check and thus the iteration continues. The next time compaction occurs, the freeing scanner will likely start at the end of the zone again since no success was made previously and we get the same lengthy iteration until the zone is brought above the low watermark. All thp page faults can take >400ms in such a state without this fix. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1606211820350.97086@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Minchan Kim Cc: Joonsoo Kim Cc: Mel Gorman Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 1427366ad673..79bfe0e06907 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -441,25 +441,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); + if (!isolated) + break; + total_isolated += isolated; + cc->nr_freepages += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); page++; } - - /* If a page was split, advance to the end of it */ - if (isolated) { - cc->nr_freepages += isolated; - if (!strict && - cc->nr_migratepages <= cc->nr_freepages) { - blockpfn += isolated; - break; - } - - blockpfn += isolated - 1; - cursor += isolated - 1; - continue; + if (!strict && cc->nr_migratepages <= cc->nr_freepages) { + blockpfn += isolated; + break; } + /* Advance to the end of split page */ + blockpfn += isolated - 1; + cursor += isolated - 1; + continue; isolate_fail: if (strict) @@ -469,6 +467,9 @@ isolate_fail: } + if (locked) + spin_unlock_irqrestore(&cc->zone->lock, flags); + /* * There is a tiny chance that we have read bogus compound_order(), * so be careful to not go outside of the pageblock. @@ -490,9 +491,6 @@ isolate_fail: if (strict && blockpfn < end_pfn) total_isolated = 0; - if (locked) - spin_unlock_irqrestore(&cc->zone->lock, flags); - /* Update the pageblock-skip if the whole pageblock was scanned */ if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); @@ -1011,6 +1009,7 @@ static void isolate_freepages(struct compact_control *cc) block_end_pfn = block_start_pfn, block_start_pfn -= pageblock_nr_pages, isolate_start_pfn = block_start_pfn) { + unsigned long isolated; /* * This can iterate a massively long zone without finding any @@ -1035,8 +1034,12 @@ static void isolate_freepages(struct compact_control *cc) continue; /* Found a block suitable for isolating free pages from. */ - isolate_freepages_block(cc, &isolate_start_pfn, - block_end_pfn, freelist, false); + isolated = isolate_freepages_block(cc, &isolate_start_pfn, + block_end_pfn, freelist, false); + /* If isolation failed early, do not continue needlessly */ + if (!isolated && isolate_start_pfn < block_end_pfn && + cc->nr_migratepages > cc->nr_freepages) + break; /* * If we isolated enough freepages, or aborted due to async From 7186ee06b66313dae0d34ec5241fda7c4a537cb9 Mon Sep 17 00:00:00 2001 From: Gang He Date: Fri, 24 Jun 2016 14:50:13 -0700 Subject: [PATCH 0903/1050] ocfs2: disable BUG assertions in reading blocks According to some high-load testing, these two BUG assertions were encountered, this led system panic. Actually, there were some discussions about removing these two BUG() assertions, it would not bring any side effect. Then, I did the the following changes, 1) use the existing macro CATCH_BH_JBD_RACES to wrap BUG() in the ocfs2_read_blocks_sync function like before. 2) disable the macro CATCH_BH_JBD_RACES in Makefile by default. Link: http://lkml.kernel.org/r/1466574294-26863-1-git-send-email-ghe@suse.com Signed-off-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/Makefile | 2 -- fs/ocfs2/buffer_head_io.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index e27e6527912b..4342c7ee7d20 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -1,7 +1,5 @@ ccflags-y := -Ifs/ocfs2 -ccflags-y += -DCATCH_BH_JBD_RACES - obj-$(CONFIG_OCFS2_FS) += \ ocfs2.o \ ocfs2_stackglue.o diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index fe50ded1b4ce..498641eed2db 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, lock_buffer(bh); if (buffer_jbd(bh)) { +#ifdef CATCH_BH_JBD_RACES mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); +#else + unlock_buffer(bh); + continue; +#endif } clear_buffer_uptodate(bh); From 74070542099c66d87aebeacd7b54dc0e8b6a73f9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:50:16 -0700 Subject: [PATCH 0904/1050] oom, suspend: fix oom_reaper vs. oom_killer_disable race Tetsuo has reported the following potential oom_killer_disable vs. oom_reaper race: (1) freeze_processes() starts freezing user space threads. (2) Somebody (maybe a kenrel thread) calls out_of_memory(). (3) The OOM killer calls mark_oom_victim() on a user space thread P1 which is already in __refrigerator(). (4) oom_killer_disable() sets oom_killer_disabled = true. (5) P1 leaves __refrigerator() and enters do_exit(). (6) The OOM reaper calls exit_oom_victim(P1) before P1 can call exit_oom_victim(P1). (7) oom_killer_disable() returns while P1 not yet finished (8) P1 perform IO/interfere with the freezer. This situation is unfortunate. We cannot move oom_killer_disable after all the freezable kernel threads are frozen because the oom victim might depend on some of those kthreads to make a forward progress to exit so we could deadlock. It is also far from trivial to teach the oom_reaper to not call exit_oom_victim() because then we would lose a guarantee of the OOM killer and oom_killer_disable forward progress because exit_mm->mmput might block and never call exit_oom_victim. It seems the easiest way forward is to workaround this race by calling try_to_freeze_tasks again after oom_killer_disable. This will make sure that all the tasks are frozen or it bails out. Fixes: 449d777d7ad6 ("mm, oom_reaper: clear TIF_MEMDIE for all tasks queued for oom_reaper") Link: http://lkml.kernel.org/r/1466597634-16199-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/power/process.c b/kernel/power/process.c index df058bed53ce..0c2ee9761d57 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -146,6 +146,18 @@ int freeze_processes(void) if (!error && !oom_killer_disable()) error = -EBUSY; + /* + * There is a hard to fix race between oom_reaper kernel thread + * and oom_killer_disable. oom_reaper calls exit_oom_victim + * before the victim reaches exit_mm so try to freeze all the tasks + * again and catch such a left over task. + */ + if (!error) { + pr_info("Double checking all user space processes after OOM killer disable... "); + error = try_to_freeze_tasks(true); + pr_cont("\n"); + } + if (error) thaw_processes(); return error; From 63d2f95d63396059200c391ca87161897b99e74a Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Fri, 24 Jun 2016 14:50:18 -0700 Subject: [PATCH 0905/1050] fs/nilfs2: fix potential underflow in call to crc32_le The value `bytes' comes from the filesystem which is about to be mounted. We cannot trust that the value is always in the range we expect it to be. Check its value before using it to calculate the length for the crc32_le call. It value must be larger (or equal) sumoff + 4. This fixes a kernel bug when accidentially mounting an image file which had the nilfs2 magic value 0x3434 at the right offset 0x406 by chance. The bytes 0x01 0x00 were stored at 0x408 and were interpreted as a s_bytes value of 1. This caused an underflow when substracting sumoff + 4 (20) in the call to crc32_le. BUG: unable to handle kernel paging request at ffff88021e600000 IP: crc32_le+0x36/0x100 ... Call Trace: nilfs_valid_sb.part.5+0x52/0x60 [nilfs2] nilfs_load_super_block+0x142/0x300 [nilfs2] init_nilfs+0x60/0x390 [nilfs2] nilfs_mount+0x302/0x520 [nilfs2] mount_fs+0x38/0x160 vfs_kern_mount+0x67/0x110 do_mount+0x269/0xe00 SyS_mount+0x9f/0x100 entry_SYSCALL_64_fastpath+0x16/0x71 Link: http://lkml.kernel.org/r/1466778587-5184-2-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/the_nilfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 809bd2de7ad0..e9fd241b9a0a 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); From 7c5b7239465932400ee0825bcc90624717c1af19 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Jun 2016 14:50:21 -0700 Subject: [PATCH 0906/1050] tools/vm/slabinfo: fix spelling mistake: "Ocurrences" -> "Occurrences" trivial fix to spelling mistake Link: http://lkml.kernel.org/r/1466672144-831-1-git-send-email-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/slabinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 1889163f2f05..7cf6e1769903 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -492,7 +492,7 @@ static void slab_stats(struct slabinfo *s) s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; if (total) { - printf("\nSlab Deactivation Ocurrences %%\n"); + printf("\nSlab Deactivation Occurrences %%\n"); printf("-------------------------------------------------\n"); printf("Slab full %7lu %3lu%%\n", s->deactivate_full, (s->deactivate_full * 100) / total); From 8285027fc479949a7a166bc1b26ce57e894878a7 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 24 Jun 2016 14:50:24 -0700 Subject: [PATCH 0907/1050] mm/page_owner: avoid null pointer dereference We have dereferenced page_ext before checking it. Lets check it first and then used it. Fixes: f86e4271978b ("mm: check the return value of lookup_page_ext for all call sites") Link: http://lkml.kernel.org/r/1465249059-7883-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_owner.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index c6cda3e36212..fedeba88c9cb 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -207,13 +207,15 @@ void __dump_page_owner(struct page *page) .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0], }; - gfp_t gfp_mask = page_ext->gfp_mask; - int mt = gfpflags_to_migratetype(gfp_mask); + gfp_t gfp_mask; + int mt; if (unlikely(!page_ext)) { pr_alert("There is not page extension available.\n"); return; } + gfp_mask = page_ext->gfp_mask; + mt = gfpflags_to_migratetype(gfp_mask); if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); From 5a9294e5c535deab69831076af15cd35e1c95f8b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 24 Jun 2016 14:50:27 -0700 Subject: [PATCH 0908/1050] autofs: don't get stuck in a loop if vfs_write() returns an error __vfs_write() returns a negative value in a error case. Link: http://lkml.kernel.org/r/20160616083108.6278.65815.stgit@pluto.themaw.net Signed-off-by: Andrey Vagin Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/waitq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 0146d911f468..631f1554c87b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi, set_fs(KERNEL_DS); mutex_lock(&sbi->pipe_mutex); - wr = __vfs_write(file, data, bytes, &file->f_pos); - while (bytes && wr) { + while (bytes) { + wr = __vfs_write(file, data, bytes, &file->f_pos); + if (wr <= 0) + break; data += wr; bytes -= wr; - wr = __vfs_write(file, data, bytes, &file->f_pos); } mutex_unlock(&sbi->pipe_mutex); From 0fd5ed8d897cffdc74903931bd7fcc9d8d154460 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 24 Jun 2016 14:50:30 -0700 Subject: [PATCH 0909/1050] init/main.c: fix initcall_blacklisted on ia64, ppc64 and parisc64 When I replaced kasprintf("%pf") with a direct call to sprint_symbol_no_offset I must have broken the initcall blacklisting feature on the arches where dereference_function_descriptor() is non-trivial. Fixes: c8cdd2be213f (init/main.c: simplify initcall_blacklisted()) Link: http://lkml.kernel.org/r/1466027283-4065-1-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Yang Shi Cc: Prarit Bhargava Cc: Petr Mladek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..63a5afb54f25 100644 --- a/init/main.c +++ b/init/main.c @@ -708,11 +708,13 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) { struct blacklist_entry *entry; char fn_name[KSYM_SYMBOL_LEN]; + unsigned long addr; if (list_empty(&blacklisted_initcalls)) return false; - sprint_symbol_no_offset(fn_name, (unsigned long)fn); + addr = (unsigned long) dereference_function_descriptor(fn); + sprint_symbol_no_offset(fn_name, addr); list_for_each_entry(entry, &blacklisted_initcalls, next) { if (!strcmp(fn_name, entry->buf)) { From b42b90d177d0c7f46f9c888c10c7900578ae7e09 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 24 Jun 2016 23:49:03 -0400 Subject: [PATCH 0910/1050] ceph: fix d_obtain_alias() misuses on failure d_obtain_alias() will have done iput() Signed-off-by: Al Viro --- fs/ceph/export.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 6e72c98162d5..1780218a48f0 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) } dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb, return ERR_PTR(-ENOENT); dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, dout("fh_to_parent %llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); - if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) + if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); return dentry; } From 9521d39976db20f8ef9b56af66661482a17d5364 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 25 Jun 2016 21:53:30 +1000 Subject: [PATCH 0911/1050] Fix build break in fork.c when THREAD_SIZE < PAGE_SIZE Commit b235beea9e99 ("Clarify naming of thread info/stack allocators") breaks the build on some powerpc configs, where THREAD_SIZE < PAGE_SIZE: kernel/fork.c:235:2: error: implicit declaration of function 'free_thread_stack' kernel/fork.c:355:8: error: assignment from incompatible pointer type stack = alloc_thread_stack_node(tsk, node); ^ Fix it by renaming free_stack() to free_thread_stack(), and updating the return type of alloc_thread_stack_node(). Fixes: b235beea9e99 ("Clarify naming of thread info/stack allocators") Signed-off-by: Michael Ellerman Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 37b9439b8c07..4a7ec0c6c88c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -183,13 +183,13 @@ static inline void free_thread_stack(unsigned long *stack) # else static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_stack(unsigned long *stack) +static void free_thread_stack(unsigned long *stack) { kmem_cache_free(thread_stack_cache, stack); } From 02dbfc99b424dde3cf0a492ed3bec4f222441754 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 20 May 2016 13:50:33 -0700 Subject: [PATCH 0912/1050] Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes Commit fe742fd4f90f ("Revert "btrfs: switch to ->iterate_shared()"") backed out the conversion to ->iterate_shared() for Btrfs because the delayed inode handling in btrfs_real_readdir() is racy. However, we can still do readdir in parallel if there are no delayed nodes. This is a temporary fix which upgrades the shared inode lock to an exclusive lock only when we have delayed items until we come up with a more complete solution. While we're here, rename the btrfs_{get,put}_delayed_items functions to make it very clear that they're just for readdir. Tested with xfstests and by doing a parallel kernel build: while make tinyconfig && make -j4 && git clean dqfx; do : done along with a bunch of parallel finds in another shell: while true; do for ((i=0; i<4; i++)); do find . >/dev/null & done wait done Signed-off-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 27 ++++++++++++++++++++++----- fs/btrfs/delayed-inode.h | 10 ++++++---- fs/btrfs/inode.c | 10 ++++++---- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 61561c2a3f96..d3aaabbfada0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) return 0; } -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list) +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) - return; + return false; + + /* + * We can only do one readdir with delayed items at a time because of + * item->readdir_list. + */ + inode_unlock_shared(inode); + inode_lock(inode); mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); @@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, * requeue or dequeue this delayed node. */ atomic_dec(&delayed_node->refs); + + return true; } -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list) +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_item *curr, *next; @@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, if (atomic_dec_and_test(&curr->refs)) kfree(curr); } + + /* + * The VFS is going to do up_read(), so we need to downgrade back to a + * read lock. + */ + downgrade_write(&inode->i_rwsem); } int btrfs_should_delete_dir_index(struct list_head *del_list, diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0167853c84ae..2495b3d4075f 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); void btrfs_destroy_delayed_inodes(struct btrfs_root *root); /* Used for readdir() */ -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list); -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list); +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d2be95cfb6d1..969a25c5abcb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ bool emitted; + bool put = false; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - btrfs_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, &ins_list, + &del_list); } key.type = key_type; @@ -5921,8 +5923,8 @@ next: nopos: ret = 0; err: - if (key_type == BTRFS_DIR_INDEX_KEY) - btrfs_put_delayed_items(&ins_list, &del_list); + if (put) + btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); btrfs_free_path(path); return ret; } @@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = btrfs_real_readdir, + .iterate_shared = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, From 1b45996d2ebf9680ccd0db875fc668aa025f40fd Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 17 May 2016 11:41:04 -0700 Subject: [PATCH 0913/1050] tty: vt: Fix soft lockup in fbcon cursor blink timer. We are getting somewhat random soft lockups with this signature: [ 86.992215] [] el1_irq+0xa0/0x10c [ 86.997082] [] cursor_timer_handler+0x30/0x54 [ 87.002991] [] call_timer_fn+0x54/0x1a8 [ 87.008378] [] run_timer_softirq+0x1c4/0x2bc [ 87.014200] [] __do_softirq+0x114/0x344 [ 87.019590] [] irq_exit+0x74/0x98 [ 87.024458] [] __handle_domain_irq+0x98/0xfc [ 87.030278] [] gic_handle_irq+0x94/0x190 This is caused by the vt visual_init() function calling into fbcon_init() with a vc_cur_blink_ms value of zero. This is a transient condition, as it is later set to a non-zero value. But, if the timer happens to expire while the blink rate is zero, it goes into an endless loop, and we get soft lockup. The fix is to initialize vc_cur_blink_ms before calling the con_init() function. Signed-off-by: David Daney Cc: stable@vger.kernel.org Acked-by: Pavel Machek Tested-by: Ming Lei Acked-by: Scot Doyle Tested-by: Henrique de Moraes Holschuh Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index dc125322f48f..5b0fe97c46ca 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init) vc->vc_complement_mask = 0; vc->vc_can_do_color = 0; vc->vc_panic_force_write = false; + vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS; vc->vc_sw->con_init(vc, init); if (!vc->vc_complement_mask) vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800; From ef3149eb3ddb7f9125e11c90f8330e371b55cffd Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 22 Jun 2016 20:43:30 +0100 Subject: [PATCH 0914/1050] staging: iio: accel: fix error check sca3000_read_ctrl_reg() returns a negative number on failure, check for this instead of zero. Signed-off-by: Luis de Bethencourt Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/accel/sca3000_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c index a8f533af9eca..ec12181822e6 100644 --- a/drivers/staging/iio/accel/sca3000_core.c +++ b/drivers/staging/iio/accel/sca3000_core.c @@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev, goto error_ret_mut; ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL); mutex_unlock(&st->lock); - if (ret) + if (ret < 0) goto error_ret; val = ret; if (base_freq > 0) From 0c1f91b98552da49d9d8eed32b3132a58d2f4598 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 17 Jun 2016 15:22:24 +0200 Subject: [PATCH 0915/1050] iio: accel: kxsd9: fix the usage of spi_w8r8() These two spi_w8r8() calls return a value with is used by the code following the error check. The dubious use was caused by a cleanup patch. Fixes: d34dbee8ac8e ("staging:iio:accel:kxsd9 cleanup and conversion to iio_chan_spec.") Signed-off-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxsd9.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 923f56598d4b..3a9f106787d2 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro) mutex_lock(&st->buf_lock); ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C); st->tx[1] = (ret & ~KXSD9_FS_MASK) | i; @@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, break; case IIO_CHAN_INFO_SCALE: ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK]; ret = IIO_VAL_INT_PLUS_MICRO; From 6b7f4e25f3309f106a5c7ff42c8231494cf285d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jun 2016 13:53:32 +0100 Subject: [PATCH 0916/1050] iio:ad7266: Fix broken regulator error handling All regulator_get() variants return either a pointer to a regulator or an ERR_PTR() so testing for NULL makes no sense and may lead to bugs if we use NULL as a valid regulator. Fix this by using IS_ERR() as expected. Signed-off-by: Mark Brown Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 21e19b60e2b9..835d45db258f 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -397,7 +397,7 @@ static int ad7266_probe(struct spi_device *spi) st = iio_priv(indio_dev); st->reg = devm_regulator_get(&spi->dev, "vref"); - if (!IS_ERR_OR_NULL(st->reg)) { + if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) return ret; From e5511c816e5ac4909bdd38e85ac344e2b9b8e984 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jun 2016 13:53:33 +0100 Subject: [PATCH 0917/1050] iio:ad7266: Fix support for optional regulators The ad7266 driver attempts to support deciding between the use of internal and external power supplies by checking to see if an error is returned when requesting the regulator. This doesn't work with the current code since the driver uses a normal regulator_get() which is for non-optional supplies and so assumes that if a regulator is not provided by the platform then this is a bug in the platform integration and so substitutes a dummy regulator. Use regulator_get_optional() instead which indicates to the framework that the regulator may be absent and provides a dummy regulator instead. Signed-off-by: Mark Brown Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 835d45db258f..655b36b4e9cb 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -396,7 +396,7 @@ static int ad7266_probe(struct spi_device *spi) st = iio_priv(indio_dev); - st->reg = devm_regulator_get(&spi->dev, "vref"); + st->reg = devm_regulator_get_optional(&spi->dev, "vref"); if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) From 68b356eb3d9f5e38910fb62e22a78e2a18d544ae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jun 2016 13:53:34 +0100 Subject: [PATCH 0918/1050] iio:ad7266: Fix probe deferral for vref Currently the ad7266 driver treats any failure to get vref as though the regulator were not present but this means that if probe deferral is triggered the driver will act as though the regulator were not present. Instead only use the internal reference if we explicitly got -ENODEV which is what is returned for absent regulators. Signed-off-by: Mark Brown Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 655b36b4e9cb..2123f0ac2e2a 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi) st->vref_mv = ret / 1000; } else { + /* Any other error indicates that the regulator does exist */ + if (PTR_ERR(st->reg) != -ENODEV) + return PTR_ERR(st->reg); /* Use internal reference */ st->vref_mv = 2500; } From 5353ed8deedee9e5acb9f896e9032158f5d998de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Jun 2016 15:40:27 +0100 Subject: [PATCH 0919/1050] devpts: fix null pointer dereference on failed memory allocation An ENOMEM when creating a pair tty in tty_ldisc_setup causes a null pointer dereference in devpts_kill_index because tty->link->driver_data is NULL. The oops was triggered with the pty stressor in stress-ng when in a low memory condition. tty_init_dev tries to clean up a tty_ldisc_setup ENOMEM error by calling release_tty, however, this ultimately tries to clean up the NULL pair'd tty in pty_unix98_remove, triggering the Oops. Add check to pty_unix98_remove to only clean up fsi if it is not NULL. Ooops: [ 23.020961] Oops: 0000 [#1] SMP [ 23.020976] Modules linked in: ppdev snd_hda_codec_generic snd_hda_intel snd_hda_codec parport_pc snd_hda_core snd_hwdep parport snd_pcm input_leds joydev snd_timer serio_raw snd soundcore i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel qxl aes_x86_64 ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect psmouse sysimgblt floppy fb_sys_fops drm pata_acpi jitterentropy_rng drbg ansi_cprng [ 23.020978] CPU: 0 PID: 1452 Comm: stress-ng-pty Not tainted 4.7.0-rc4+ #2 [ 23.020978] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 23.020979] task: ffff88007ba30000 ti: ffff880078ea8000 task.ti: ffff880078ea8000 [ 23.020981] RIP: 0010:[] [] ida_remove+0x1f/0x120 [ 23.020981] RSP: 0018:ffff880078eabb60 EFLAGS: 00010a03 [ 23.020982] RAX: 4444444444444567 RBX: 0000000000000000 RCX: 000000000000001f [ 23.020982] RDX: 000000000000014c RSI: 000000000000026f RDI: 0000000000000000 [ 23.020982] RBP: ffff880078eabb70 R08: 0000000000000004 R09: 0000000000000036 [ 23.020983] R10: 000000000000026f R11: 0000000000000000 R12: 000000000000026f [ 23.020983] R13: 000000000000026f R14: ffff88007c944b40 R15: 000000000000026f [ 23.020984] FS: 00007f9a2f3cc700(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 [ 23.020984] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 23.020985] CR2: 0000000000000010 CR3: 000000006c81b000 CR4: 00000000001406f0 [ 23.020988] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 23.020988] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 23.020988] Stack: [ 23.020989] 0000000000000000 000000000000026f ffff880078eabb90 ffffffff812a5a99 [ 23.020990] 0000000000000000 00000000fffffff4 ffff880078eabba8 ffffffff814f9cbe [ 23.020991] ffff88007965c800 ffff880078eabbc8 ffffffff814eef43 fffffffffffffff4 [ 23.020991] Call Trace: [ 23.021000] [] devpts_kill_index+0x29/0x50 [ 23.021002] [] pty_unix98_remove+0x2e/0x50 [ 23.021006] [] release_tty+0xb3/0x1b0 [ 23.021007] [] tty_init_dev+0xd4/0x1c0 [ 23.021011] [] ptmx_open+0xae/0x190 [ 23.021013] [] chrdev_open+0xbf/0x1b0 [ 23.021015] [] do_dentry_open+0x203/0x310 [ 23.021016] [] ? cdev_put+0x30/0x30 [ 23.021017] [] vfs_open+0x54/0x80 [ 23.021018] [] ? may_open+0x8c/0x100 [ 23.021019] [] path_openat+0x2eb/0x1440 [ 23.021020] [] ? putname+0x54/0x60 [ 23.021022] [] ? n_tty_ioctl_helper+0x27/0x100 [ 23.021023] [] do_filp_open+0x91/0x100 [ 23.021024] [] ? getname_flags+0x56/0x1f0 [ 23.021026] [] ? __alloc_fd+0x46/0x190 [ 23.021027] [] do_sys_open+0x124/0x210 [ 23.021028] [] SyS_open+0x1e/0x20 [ 23.021035] [] entry_SYSCALL_64_fastpath+0x1e/0xa8 [ 23.021044] Code: 63 28 45 31 e4 eb dd 0f 1f 44 00 00 55 4c 63 d6 48 ba 89 88 88 88 88 88 88 88 4c 89 d0 b9 1f 00 00 00 48 f7 e2 48 89 e5 41 54 53 <8b> 47 10 48 89 fb 8d 3c c5 00 00 00 00 48 c1 ea 09 b8 01 00 00 [ 23.021045] RIP [] ida_remove+0x1f/0x120 [ 23.021045] RSP [ 23.021046] CR2: 0000000000000010 Signed-off-by: Colin Ian King Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index f856c4544eea..51e0d32883ba 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) fsi = tty->driver_data; else fsi = tty->link->driver_data; - devpts_kill_index(fsi, tty->index); - devpts_release(fsi); + + if (fsi) { + devpts_kill_index(fsi, tty->index); + devpts_release(fsi); + } } static const struct tty_operations ptm_unix98_ops = { From 4c2e07c6a29e0129e975727b9f57eede813eea85 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Jun 2016 17:52:03 -0700 Subject: [PATCH 0920/1050] Linux 4.7-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4fb6beac5f09..6471f20ca400 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From a37503bc387ce2434106d4bf4870bd73081c7355 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 22 Jun 2016 12:40:50 -0500 Subject: [PATCH 0921/1050] net: smsc911x: Fix bug where PHY interrupts are overwritten by 0 By default, mdiobus_alloc() sets the PHYs to polling mode, but a pointer size memcpy means that a couple IRQs end up being overwritten with a value of 0. This means that PHY_POLL is disabled and results in unpredictable behavior depending on the PHY's location on the MDIO bus. Remove that memcpy and the now unused phy_irq member to force the SMSC911x PHYs into polling mode 100% of the time. Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core") Signed-off-by: Jeremy Linton Reviewed-by: Andrew Lunn Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 8af25563f627..b5ab5e120bca 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -116,7 +116,6 @@ struct smsc911x_data { struct phy_device *phy_dev; struct mii_bus *mii_bus; - int phy_irq[PHY_MAX_ADDR]; unsigned int using_extphy; int last_duplex; int last_carrier; @@ -1073,7 +1072,6 @@ static int smsc911x_mii_init(struct platform_device *pdev, pdata->mii_bus->priv = pdata; pdata->mii_bus->read = smsc911x_mii_read; pdata->mii_bus->write = smsc911x_mii_write; - memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus)); pdata->mii_bus->parent = &pdev->dev; From 8e96a87c5431c256feb65bcfc5aec92d9f7839b6 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 17 Jun 2016 14:58:34 +1000 Subject: [PATCH 0922/1050] powerpc/tm: Always reclaim in start_thread() for exec() class syscalls Userspace can quite legitimately perform an exec() syscall with a suspended transaction. exec() does not return to the old process, rather it load a new one and starts that, the expectation therefore is that the new process starts not in a transaction. Currently exec() is not treated any differently to any other syscall which creates problems. Firstly it could allow a new process to start with a suspended transaction for a binary that no longer exists. This means that the checkpointed state won't be valid and if the suspended transaction were ever to be resumed and subsequently aborted (a possibility which is exceedingly likely as exec()ing will likely doom the transaction) the new process will jump to invalid state. Secondly the incorrect attempt to keep the transactional state while still zeroing state for the new process creates at least two TM Bad Things. The first triggers on the rfid to return to userspace as start_thread() has given the new process a 'clean' MSR but the suspend will still be set in the hardware MSR. The second TM Bad Thing triggers in __switch_to() as the processor is still transactionally suspended but __switch_to() wants to zero the TM sprs for the new process. This is an example of the outcome of calling exec() with a suspended transaction. Note the first 700 is likely the first TM bad thing decsribed earlier only the kernel can't report it as we've loaded userspace registers. c000000000009980 is the rfid in fast_exception_return() Bad kernel stack pointer 3fffcfa1a370 at c000000000009980 Oops: Bad kernel stack pointer, sig: 6 [#1] CPU: 0 PID: 2006 Comm: tm-execed Not tainted NIP: c000000000009980 LR: 0000000000000000 CTR: 0000000000000000 REGS: c00000003ffefd40 TRAP: 0700 Not tainted MSR: 8000000300201031 CR: 00000000 XER: 00000000 CFAR: c0000000000098b4 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 00003fffcfa1a370 0000000000000000 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 00003fff966611c0 0000000000000000 0000000000000000 0000000000000000 NIP [c000000000009980] fast_exception_return+0xb0/0xb8 LR [0000000000000000] (null) Call Trace: Instruction dump: f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070 e8410080 e8610088 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed023b Kernel BUG at c000000000043e80 [verbose debug info unavailable] Unexpected TM Bad Thing exception at c000000000043e80 (msr 0x201033) Oops: Unrecoverable exception, sig: 6 [#2] CPU: 0 PID: 2006 Comm: tm-execed Tainted: G D task: c0000000fbea6d80 ti: c00000003ffec000 task.ti: c0000000fb7ec000 NIP: c000000000043e80 LR: c000000000015a24 CTR: 0000000000000000 REGS: c00000003ffef7e0 TRAP: 0700 Tainted: G D MSR: 8000000300201033 CR: 28002828 XER: 00000000 CFAR: c000000000015a20 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 c00000003ffefa60 c000000000db5500 c0000000fbead000 GPR04: 8000000300001033 2222222222222222 2222222222222222 00000000ff160000 GPR08: 0000000000000000 800000010000d033 c0000000fb7e3ea0 c00000000fe00004 GPR12: 0000000000002200 c00000000fe00000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c0000000fbea7410 00000000ff160000 GPR24: c0000000ffe1f600 c0000000fbea8700 c0000000fbea8700 c0000000fbead000 GPR28: c000000000e20198 c0000000fbea6d80 c0000000fbeab680 c0000000fbea6d80 NIP [c000000000043e80] tm_restore_sprs+0xc/0x1c LR [c000000000015a24] __switch_to+0x1f4/0x420 Call Trace: Instruction dump: 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020 This fixes CVE-2016-5828. Fixes: bc2a9408fa65 ("powerpc: Hook in new transactional memory code") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..0b93893424f5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; From 0efce9da1255f13d910842b62cb14371044a3c50 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 8 Jun 2016 11:38:55 +0100 Subject: [PATCH 0923/1050] arm64: KVM: fix build with CONFIG_ARM_PMU disabled When CONFIG_ARM_PMU is disabled, we get the following build error: arch/arm64/kvm/sys_regs.c: In function 'pmu_counter_idx_valid': arch/arm64/kvm/sys_regs.c:564:27: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) ^ arch/arm64/kvm/sys_regs.c:564:27: note: each undeclared identifier is reported only once for each function it appears in arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evcntr': arch/arm64/kvm/sys_regs.c:592:10: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) idx = ARMV8_PMU_CYCLE_IDX; ^ arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evtyper': arch/arm64/kvm/sys_regs.c:638:14: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) if (idx == ARMV8_PMU_CYCLE_IDX) ^ arch/arm64/kvm/hyp/switch.c:86:15: error: 'ARMV8_PMU_USERENR_MASK' undeclared (first use in this function) write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); This patch fixes the build with CONFIG_ARM_PMU disabled. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Sudeep Holla Signed-off-by: Christoffer Dall --- include/kvm/arm_pmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fe389ac31489..92e7e97ca8ff 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -18,13 +18,13 @@ #ifndef __ASM_ARM_KVM_PMU_H #define __ASM_ARM_KVM_PMU_H -#ifdef CONFIG_KVM_ARM_PMU - #include #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) +#ifdef CONFIG_KVM_ARM_PMU + struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ struct perf_event *perf_event; From 583248e6620a4726093295e2d6785fcbc2e86428 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Jun 2016 12:10:08 +0100 Subject: [PATCH 0924/1050] iommu/iova: Disable preemption around use of this_cpu_ptr() Between acquiring the this_cpu_ptr() and using it, ideally we don't want to be preempted and work on another CPU's private data. this_cpu_ptr() checks whether or not preemption is disable, and get_cpu_ptr() provides a convenient wrapper for operating on the cpu ptr inside a preemption disabled critical section (which currently is provided by the spinlock). [ 167.997877] BUG: using smp_processor_id() in preemptible [00000000] code: usb-storage/216 [ 167.997940] caller is debug_smp_processor_id+0x17/0x20 [ 167.997945] CPU: 7 PID: 216 Comm: usb-storage Tainted: G U 4.7.0-rc1-gfxbench-RO_Patchwork_1057+ #1 [ 167.997948] Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012 [ 167.997951] 0000000000000000 ffff880118b7f9c8 ffffffff8140dca5 0000000000000007 [ 167.997958] ffffffff81a3a7e9 ffff880118b7f9f8 ffffffff8142a927 0000000000000000 [ 167.997965] ffff8800d499ed58 0000000000000001 00000000000fffff ffff880118b7fa08 [ 167.997971] Call Trace: [ 167.997977] [] dump_stack+0x67/0x92 [ 167.997981] [] check_preemption_disabled+0xd7/0xe0 [ 167.997985] [] debug_smp_processor_id+0x17/0x20 [ 167.997990] [] alloc_iova_fast+0xb7/0x210 [ 167.997994] [] intel_alloc_iova+0x7f/0xd0 [ 167.997998] [] intel_map_sg+0xbd/0x240 [ 167.998002] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 167.998009] [] usb_hcd_map_urb_for_dma+0x4b9/0x5a0 [ 167.998013] [] usb_hcd_submit_urb+0xe9/0xaa0 [ 167.998017] [] ? mark_held_locks+0x6f/0xa0 [ 167.998022] [] ? __raw_spin_lock_init+0x1c/0x50 [ 167.998025] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 167.998028] [] usb_submit_urb+0x3f3/0x5a0 [ 167.998032] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 167.998035] [] usb_sg_wait+0x67/0x150 [ 167.998039] [] usb_stor_bulk_transfer_sglist.part.3+0x82/0xd0 [ 167.998042] [] usb_stor_bulk_srb+0x4c/0x60 [ 167.998045] [] usb_stor_Bulk_transport+0x17e/0x420 [ 167.998049] [] usb_stor_invoke_transport+0x242/0x540 [ 167.998052] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 167.998058] [] usb_stor_transparent_scsi_command+0x9/0x10 [ 167.998061] [] usb_stor_control_thread+0x158/0x260 [ 167.998064] [] ? fill_inquiry_response+0x20/0x20 [ 167.998067] [] ? fill_inquiry_response+0x20/0x20 [ 167.998071] [] kthread+0xea/0x100 [ 167.998078] [] ret_from_fork+0x1f/0x40 [ 167.998081] [] ? kthread_create_on_node+0x1f0/0x1f0 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96293 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Fixes: 9257b4a206fc ('iommu/iova: introduce per-cpu caching to iova allocation') Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index ba764a0835d3..e23001bfcfee 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -420,8 +420,10 @@ retry: /* Try replenishing IOVAs by flushing rcache. */ flushed_rcache = true; + preempt_disable(); for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); + preempt_enable(); goto retry; } @@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, bool can_insert = false; unsigned long flags; - cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, iova_magazine_push(cpu_rcache->loaded, iova_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); + put_cpu_ptr(rcache->cpu_rcaches); if (mag_to_free) { iova_magazine_free_pfns(mag_to_free, iovad); @@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, bool has_pfn = false; unsigned long flags; - cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_empty(cpu_rcache->loaded)) { @@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); + put_cpu_ptr(rcache->cpu_rcaches); return iova_pfn; } From 591d215afcc2f94e8e2c69a63c924c044677eb31 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jun 2016 17:24:45 +0100 Subject: [PATCH 0925/1050] KVM: arm/arm64: Stop leaking vcpu pid references kvm provides kvm_vcpu_uninit(), which amongst other things, releases the last reference to the struct pid of the task that was last running the vcpu. On arm64 built with CONFIG_DEBUG_KMEMLEAK, starting a guest with kvmtool, then killing it with SIGKILL results (after some considerable time) in: > cat /sys/kernel/debug/kmemleak > unreferenced object 0xffff80007d5ea080 (size 128): > comm "lkvm", pid 2025, jiffies 4294942645 (age 1107.776s) > hex dump (first 32 bytes): > 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ > 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ > backtrace: > [] create_object+0xfc/0x278 > [] kmemleak_alloc+0x34/0x70 > [] kmem_cache_alloc+0x16c/0x1d8 > [] alloc_pid+0x34/0x4d0 > [] copy_process.isra.6+0x79c/0x1338 > [] _do_fork+0x74/0x320 > [] SyS_clone+0x18/0x20 > [] el0_svc_naked+0x24/0x28 > [] 0xffffffffffffffff On x86 kvm_vcpu_uninit() is called on the path from kvm_arch_destroy_vm(), on arm no equivalent call is made. Add the call to kvm_arch_vcpu_free(). Signed-off-by: James Morse Fixes: 749cf76c5a36 ("KVM: ARM: Initial skeleton to compile KVM support") Cc: # 3.10+ Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 893941ec98dc..f1bde7c4e736 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } From 3bd4f9112f87a9c65fe6e817272806167f0bc9ed Mon Sep 17 00:00:00 2001 From: Jan Niehusmann Date: Mon, 6 Jun 2016 14:20:11 +0200 Subject: [PATCH 0926/1050] iommu/vt-d: Fix overflow of iommu->domains array The valid range of 'did' in get_iommu_domain(*iommu, did) is 0..cap_ndoms(iommu->cap), so don't exceed that range in free_all_cpu_cached_iovas(). The user-visible impact of the out-of-bounds access is the machine hanging on suspend-to-ram. It is, in fact, a kernel panic, but due to already suspended devices, that's often not visible to the user. Fixes: 22e2f9fa63b0 ("iommu/vt-d: Use per-cpu IOVA caching") Signed-off-by: Jan Niehusmann Tested-By: Marius Vlad Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 10700945994e..cfe410eedaf0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4607,7 +4607,7 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) if (!iommu) continue; - for (did = 0; did < 0xffff; did++) { + for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, did); if (!domain) From 6082ee72e9d89e80a664418be06f47d728243e85 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 26 Jun 2016 10:33:29 +0200 Subject: [PATCH 0927/1050] iommu/amd: Initialize devid variable before using it Commit 2a0cb4e2d423 ("iommu/amd: Add new map for storing IVHD dev entry type HID") added a call to DUMP_printk in init_iommu_from_acpi() which used the value of devid before this variable was initialized. Fixes: 2a0cb4e2d423 ('iommu/amd: Add new map for storing IVHD dev entry type HID') Signed-off-by: Nicolas Iooss Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 9e0034196e10..d091defc3426 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } + devid = e->devid; DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", hid, uid, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); - devid = e->devid; flags = e->flags; ret = add_acpi_hid_device(hid, uid, &devid, false); From d20cb71dbf3487f24549ede1a8e2d67579b4632e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2016 13:14:36 -0400 Subject: [PATCH 0928/1050] make nfs_atomic_open() call d_drop() on all ->open_context() errors. In "NFSv4: Move dentry instantiation into the NFSv4-specific atomic open code" unconditional d_drop() after the ->open_context() had been removed. It had been correct for success cases (there ->open_context() itself had been doing dcache manipulations), but not for error ones. Only one of those (ENOENT) got a compensatory d_drop() added in that commit, but in fact it should've been done for all errors. As it is, the case of O_CREAT non-exclusive open on a hashed negative dentry racing with e.g. symlink creation from another client ended up with ->open_context() getting an error and proceeding to call nfs_lookup(). On a hashed dentry, which would've instantly triggered BUG_ON() in d_materialise_unique() (or, these days, its equivalent in d_splice_alias()). Cc: stable@vger.kernel.org # v3.10+ Tested-by: Oleg Drokin Signed-off-by: Al Viro Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2817cce7a9f4..d8015a03db4c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1539,9 +1539,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); + d_drop(dentry); switch (err) { case -ENOENT: - d_drop(dentry); d_add(dentry, NULL); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; From 749d088b8e7f4b9826ede02b9a043e417fa84aa1 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Fri, 27 May 2016 14:17:10 +0800 Subject: [PATCH 0929/1050] pvclock: Add CPU barriers to get correct version value Protocol for the "version" fields is: hypervisor raises it (making it uneven) before it starts updating the fields and raises it again (making it even) when it is done. Thus the guest can make sure the time values it got are consistent by checking the version before and after reading them. Add CPU barries after getting version value just like what function vread_pvclock does, because all of callees in this function is inline. Fixes: 502dfeff239e8313bfbe906ca0a1a6827ac8481b Cc: stable@vger.kernel.org Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 2 ++ arch/x86/kernel/pvclock.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index fdcc04020636..538ae944855e 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -85,6 +85,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u8 ret_flags; version = src->version; + /* Make the latest version visible */ + smp_rmb(); offset = pvclock_get_nsec_offset(src); ret = src->system_time + offset; diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 99bfc025111d..7f82fe0a6807 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -66,6 +66,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); return flags & valid_flags; @@ -80,6 +82,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { From f7550d076d55c1b5f02f95012e3a5a84d264c47d Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Fri, 27 May 2016 14:17:11 +0800 Subject: [PATCH 0930/1050] pvclock: Cleanup to remove function pvclock_get_nsec_offset Function __pvclock_read_cycles is short enough, so there is no need to have another function pvclock_get_nsec_offset to calculate tsc delta. It's better to combine it into function __pvclock_read_cycles. Remove useless variables in function __pvclock_read_cycles. Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 538ae944855e..7c1c89598688 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -68,32 +68,23 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) return product; } -static __always_inline -u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) -{ - u64 delta = rdtsc_ordered() - src->tsc_timestamp; - return pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); -} - static __always_inline unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, cycle_t *cycles, u8 *flags) { unsigned version; - cycle_t ret, offset; - u8 ret_flags; + cycle_t offset; + u64 delta; version = src->version; /* Make the latest version visible */ smp_rmb(); - offset = pvclock_get_nsec_offset(src); - ret = src->system_time + offset; - ret_flags = src->flags; - - *cycles = ret; - *flags = ret_flags; + delta = rdtsc_ordered() - src->tsc_timestamp; + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + *cycles = src->system_time + offset; + *flags = src->flags; return version; } From ed911b43adb889c37a37fa57a995f0b460c633b6 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Sat, 28 May 2016 20:27:43 +0800 Subject: [PATCH 0931/1050] pvclock: Get rid of __pvclock_read_cycles in function pvclock_read_flags There is a generic function __pvclock_read_cycles to be used to get both flags and cycles. For function pvclock_read_flags, it's useless to get cycles value. To make this function be more effective, get this variable flags directly in function. Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/kernel/pvclock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 7f82fe0a6807..06c58ce46762 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -61,11 +61,14 @@ void pvclock_resume(void) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; - cycle_t ret; u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); + version = src->version; + /* Make the latest version visible */ + smp_rmb(); + + flags = src->flags; /* Make sure that the version double-check is last. */ smp_rmb(); } while ((src->version & 1) || version != src->version); From 8d93c874ac899bfdf0ad3787baef684a0c878c2c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 20 Jun 2016 22:28:02 -0300 Subject: [PATCH 0932/1050] KVM: x86: move nsec_to_cycles from x86.c to x86.h Move the inline function nsec_to_cycles from x86.c to x86.h, as the next patch uses it from lapic.c. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ------ arch/x86/kvm/x86.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..7da5dd2057a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; -static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) -{ - return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, - vcpu->arch.virtual_tsc_shift); -} - static u32 adjust_tsc_khz(u32 khz, s32 ppm) { u64 v = (u64)khz * (1000000 + ppm); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7ce3634ab5fe..a82ca466b62e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_X86_H #include +#include #include "kvm_cache_regs.h" #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL @@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) +{ + return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, + vcpu->arch.virtual_tsc_shift); +} + /* Same "calling convention" as do_div: * - divide (n << 32) by base * - put result in n From b606f189c7d5bf9b875bba168162fe05287880fe Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 20 Jun 2016 22:33:48 -0300 Subject: [PATCH 0933/1050] KVM: LAPIC: cap __delay at lapic_timer_advance_ns The host timer which emulates the guest LAPIC TSC deadline timer has its expiration diminished by lapic_timer_advance_ns nanoseconds. Therefore if, at wait_lapic_expire, a difference larger than lapic_timer_advance_ns is encountered, delay at most lapic_timer_advance_ns. This fixes a problem where the guest can cause the host to delay for large amounts of time. Reported-by: Alan Jenkins Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..a397200281c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } static void start_apic_timer(struct kvm_lapic *apic) From ff30ef40deca4658e27b0c596e7baf39115e858f Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Sat, 18 Jun 2016 11:01:05 +0200 Subject: [PATCH 0934/1050] KVM: nVMX: VMX instructions: fix segment checks when L1 is in long mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I couldn't get Xen to boot a L2 HVM when it was nested under KVM - it was getting a GP(0) on a rather unspecial vmread from Xen: (XEN) ----[ Xen-4.7.0-rc x86_64 debug=n Not tainted ]---- (XEN) CPU: 1 (XEN) RIP: e008:[] vmx_get_segment_register+0x14e/0x450 (XEN) RFLAGS: 0000000000010202 CONTEXT: hypervisor (d1v0) (XEN) rax: ffff82d0801e6288 rbx: ffff83003ffbfb7c rcx: fffffffffffab928 (XEN) rdx: 0000000000000000 rsi: 0000000000000000 rdi: ffff83000bdd0000 (XEN) rbp: ffff83000bdd0000 rsp: ffff83003ffbfab0 r8: ffff830038813910 (XEN) r9: ffff83003faf3958 r10: 0000000a3b9f7640 r11: ffff83003f82d418 (XEN) r12: 0000000000000000 r13: ffff83003ffbffff r14: 0000000000004802 (XEN) r15: 0000000000000008 cr0: 0000000080050033 cr4: 00000000001526e0 (XEN) cr3: 000000003fc79000 cr2: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008 (XEN) Xen code around (vmx_get_segment_register+0x14e/0x450): (XEN) 00 00 41 be 02 48 00 00 <44> 0f 78 74 24 08 0f 86 38 56 00 00 b8 08 68 00 (XEN) Xen stack trace from rsp=ffff83003ffbfab0: ... (XEN) Xen call trace: (XEN) [] vmx_get_segment_register+0x14e/0x450 (XEN) [] get_page_from_gfn_p2m+0x165/0x300 (XEN) [] hvmemul_get_seg_reg+0x52/0x60 (XEN) [] hvm_emulate_prepare+0x53/0x70 (XEN) [] handle_mmio+0x2b/0xd0 (XEN) [] emulate.c#_hvm_emulate_one+0x111/0x2c0 (XEN) [] handle_hvm_io_completion+0x274/0x2a0 (XEN) [] __get_gfn_type_access+0xfa/0x270 (XEN) [] timer.c#add_entry+0x4b/0xb0 (XEN) [] timer.c#remove_entry+0x7c/0x90 (XEN) [] hvm_do_resume+0x23/0x140 (XEN) [] vmx_do_resume+0xa7/0x140 (XEN) [] context_switch+0x13b/0xe40 (XEN) [] schedule.c#schedule+0x22e/0x570 (XEN) [] softirq.c#__do_softirq+0x5c/0x90 (XEN) [] domain.c#idle_loop+0x25/0x50 (XEN) (XEN) (XEN) **************************************** (XEN) Panic on CPU 1: (XEN) GENERAL PROTECTION FAULT (XEN) [error_code=0000] (XEN) **************************************** Tracing my host KVM showed it was the one injecting the GP(0) when emulating the VMREAD and checking the destination segment permissions in get_vmx_mem_address(): 3) | vmx_handle_exit() { 3) | handle_vmread() { 3) | nested_vmx_check_permission() { 3) | vmx_get_segment() { 3) 0.074 us | vmx_read_guest_seg_base(); 3) 0.065 us | vmx_read_guest_seg_selector(); 3) 0.066 us | vmx_read_guest_seg_ar(); 3) 1.636 us | } 3) 0.058 us | vmx_get_rflags(); 3) 0.062 us | vmx_read_guest_seg_ar(); 3) 3.469 us | } 3) | vmx_get_cs_db_l_bits() { 3) 0.058 us | vmx_read_guest_seg_ar(); 3) 0.662 us | } 3) | get_vmx_mem_address() { 3) 0.068 us | vmx_cache_reg(); 3) | vmx_get_segment() { 3) 0.074 us | vmx_read_guest_seg_base(); 3) 0.068 us | vmx_read_guest_seg_selector(); 3) 0.071 us | vmx_read_guest_seg_ar(); 3) 1.756 us | } 3) | kvm_queue_exception_e() { 3) 0.066 us | kvm_multiple_exception(); 3) 0.684 us | } 3) 4.085 us | } 3) 9.833 us | } 3) + 10.366 us | } Cross-checking the KVM/VMX VMREAD emulation code with the Intel Software Developper Manual Volume 3C - "VMREAD - Read Field from Virtual-Machine Control Structure", I found that we're enforcing that the destination operand is NOT located in a read-only data segment or any code segment when the L1 is in long mode - BUT that check should only happen when it is in protected mode. Shuffling the code a bit to make our emulation follow the specification allows me to boot a Xen dom0 in a nested KVM and start HVM L2 guests without problems. Fixes: f9eb4af67c9d ("KVM: nVMX: VMX instructions: add checks for #GP/#SS exceptions") Signed-off-by: Quentin Casasnovas Cc: Eugene Korenevsky Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: linux-stable Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 003618e324ce..64a79f271276 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Checks for #GP/#SS exceptions. */ exn = false; - if (is_protmode(vcpu)) { + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is the only check on the memory + * destination for long mode! + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * execute-only code segment */ exn = ((s.type & 0xa) == 8); - } - if (exn) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - if (is_long_mode(vcpu)) { - /* Long mode: #GP(0)/#SS(0) if the memory address is in a - * non-canonical form. This is an only check for long mode. - */ - exn = is_noncanonical_address(*ret); - } else if (is_protmode(vcpu)) { + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); From 48f1dcb55a7d29aeb8965c567660c14d0dfd1a42 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jun 2016 15:25:09 +0200 Subject: [PATCH 0935/1050] ipv6: enforce egress device match in per table nexthop lookups with the commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups"), net hop lookup is first performed on route creation in the passed-in table. However device match is not enforced in table lookup, so the found route can be later discarded due to egress device mismatch and no global lookup will be performed. This cause the following to fail: ip link add dummy1 type dummy ip link add dummy2 type dummy ip link set dummy1 up ip link set dummy2 up ip route add 2001:db8:8086::/48 dev dummy1 metric 20 ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy1 metric 20 ip route add 2001:db8:8086::/48 dev dummy2 metric 21 ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy2 metric 21 RTNETLINK answers: No route to host This change fixes the issue enforcing device lookup in ip6_nh_lookup_table() v1->v2: updated commit message title Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Reported-and-tested-by: Beniamino Galvani Signed-off-by: Paolo Abeni Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 969913da494f..520b7884d0c2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1782,7 +1782,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = 0; + int flags = RT6_LOOKUP_F_IFACE; table = fib6_get_table(net, cfg->fc_table); if (!table) From 4192f672fae559f32d82de72a677701853cc98a7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 23 Jun 2016 16:28:58 +0100 Subject: [PATCH 0936/1050] vsock: make listener child lock ordering explicit There are several places where the listener and pending or accept queue child sockets are accessed at the same time. Lockdep is unhappy that two locks from the same class are held. Tell lockdep that it is safe and document the lock ordering. Originally Claudio Imbrenda sent a similar patch asking whether this is safe. I have audited the code and also covered the vsock_pending_work() function. Suggested-by: Claudio Imbrenda Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b5f1221f48d4..b96ac918e0ba 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -61,6 +61,14 @@ * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * + * - Lock ordering for pending or accept queue sockets is: + * + * lock_sock(listener); + * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); + * + * Using explicit nested locking keeps lockdep happy since normally only one + * lock of a given class may be taken at a time. + * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our @@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) cleanup = true; lock_sock(listener); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); @@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) if (connected) { listener->sk_ack_backlog--; - lock_sock(connected); + lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should From ab2a4bf83902c170d29ba130a8abb5f9d90559e1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 27 Jun 2016 10:23:10 -0400 Subject: [PATCH 0937/1050] USB: don't free bandwidth_mutex too early The USB core contains a bug that can show up when a USB-3 host controller is removed. If the primary (USB-2) hcd structure is released before the shared (USB-3) hcd, the core will try to do a double-free of the common bandwidth_mutex. The problem was described in graphical form by Chung-Geol Kim, who first reported it: ================================================= At *remove USB(3.0) Storage sequence <1> --> <5> ((Problem Case)) ================================================= VOLD ------------------------------------|------------ (uevent) ________|_________ |<1> | |dwc3_otg_sm_work | |usb_put_hcd | |peer_hcd(kref=2)| |__________________| ________|_________ |<2> | |New USB BUS #2 | | | |peer_hcd(kref=1) | | | --(Link)-bandXX_mutex| | |__________________| | ___________________ | |<3> | | |dwc3_otg_sm_work | | |usb_put_hcd | | |primary_hcd(kref=1)| | |___________________| | _________|_________ | |<4> | | |New USB BUS #1 | | |hcd_release | | |primary_hcd(kref=0)| | | | | |bandXX_mutex(free) |<- |___________________| (( VOLD )) ______|___________ |<5> | | SCSI | |usb_put_hcd | |peer_hcd(kref=0) | |*hcd_release | |bandXX_mutex(free*)|<- double free |__________________| ================================================= This happens because hcd_release() frees the bandwidth_mutex whenever it sees a primary hcd being released (which is not a very good idea in any case), but in the course of releasing the primary hcd, it changes the pointers in the shared hcd in such a way that the shared hcd will appear to be primary when it gets released. This patch fixes the problem by changing hcd_release() so that it deallocates the bandwidth_mutex only when the _last_ hcd structure referencing it is released. The patch also removes an unnecessary test, so that when an hcd is released, both the shared_hcd and primary_hcd pointers in the hcd's peer will be cleared. Signed-off-by: Alan Stern Reported-by: Chung-Geol Kim Tested-by: Chung-Geol Kim CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 34b837ae1ed7..d2e3f655c26f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd); * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is * deallocated. * - * Make sure to only deallocate the bandwidth_mutex when the primary HCD is - * freed. When hcd_release() is called for either hcd in a peer set - * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to - * block new peering attempts + * Make sure to deallocate the bandwidth_mutex only when the last HCD is + * freed. When hcd_release() is called for either hcd in a peer set, + * invalidate the peer's ->shared_hcd and ->primary_hcd pointers. */ static void hcd_release(struct kref *kref) { struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref); mutex_lock(&usb_port_peer_mutex); - if (usb_hcd_is_primary_hcd(hcd)) { - kfree(hcd->address0_mutex); - kfree(hcd->bandwidth_mutex); - } if (hcd->shared_hcd) { struct usb_hcd *peer = hcd->shared_hcd; peer->shared_hcd = NULL; - if (peer->primary_hcd == hcd) - peer->primary_hcd = NULL; + peer->primary_hcd = NULL; + } else { + kfree(hcd->address0_mutex); + kfree(hcd->bandwidth_mutex); } mutex_unlock(&usb_port_peer_mutex); kfree(hcd); From 023954351fae0e34ba247cff4d798c98290b20a4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 23 Jun 2016 16:54:46 -0500 Subject: [PATCH 0938/1050] dax: fix offset overflow in dax_io This isn't functionally apparent for some reason, but when we test io at extreme offsets at the end of the loff_t rang, such as in fstests xfs/071, the calculation of "max" in dax_io() can be wrong due to pos + size overflowing. For example, # xfs_io -c "pwrite 9223372036854771712 512" /mnt/test/file enters dax_io with: start 0x7ffffffffffff000 end 0x7ffffffffffff200 and the rounded up "size" variable is 0x1000. This yields: pos + size 0x8000000000000000 (overflows loff_t) end 0x7ffffffffffff200 Due to the overflow, the min() function picks the wrong value for the "max" variable, and when we send (max - pos) into i.e. copy_from_iter_pmem() it is also the wrong value. This somehow(tm) gets magically absorbed without incident, probably because iter->count is correct. But it seems best to fix it up properly by comparing the two values as unsigned. Signed-off-by: Eric Sandeen Signed-off-by: Dan Williams --- fs/dax.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 761495bf5eb9..e207f8f9b700 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, dax.addr += first; size = map_len - first; } - max = min(pos + size, end); + /* + * pos + size is one past the last offset for IO, + * so pos + size can overflow loff_t at extreme offsets. + * Cast to u64 to catch this and get the true minimum. + */ + max = min_t(u64, pos + size, end); } if (iov_iter_rw(iter) == WRITE) { From 5ab666e09541e64ce2fd73411c3b5b9e4ad334b1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jun 2016 23:47:15 +0200 Subject: [PATCH 0939/1050] intel_pstate: Do not clear utilization update hooks on policy changes intel_pstate_set_policy() is invoked by the cpufreq core during driver initialization, on changes of policy attributes (minimim and maximum frequency, for example) via sysfs and via CPU notifications from the platform firmware. On some platforms the latter may occur relatively often. Commit bb6ab52f2bef (intel_pstate: Do not set utilization update hook too early) made intel_pstate_set_policy() clear the CPU's utilization update hook before updating the policy attributes for it (and set the hook again after doind that), but that involves invoking synchronize_sched() and adds overhead to the CPU notifications mentioned above and to the sched-RCU handling in general. That extra overhead is arguably not necessary, because updating policy attributes when the CPU's utilization update hook is active should not lead to any adverse effects, so drop the clearing of the hook from intel_pstate_set_policy() and make it check if the hook has been set already when attempting to set it. Fixes: bb6ab52f2bef (intel_pstate: Do not set utilization update hook too early) Reported-by: Jisheng Zhang Tested-by: Jisheng Zhang Tested-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fe9dc17ea873..1fa1a32928d7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1400,6 +1400,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; + if (cpu->update_util_set) + return; + /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, @@ -1440,8 +1443,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - intel_pstate_clear_update_util_hook(policy->cpu); - pr_debug("set_policy cpuinfo.max %u policy->max %u\n", policy->cpuinfo.max_freq, policy->max); From ca5eda5d3db6026080cff267459625c87c43e9ab Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Jun 2016 14:50:13 +0900 Subject: [PATCH 0940/1050] cpufreq: dt: call of_node_put() before error out If of_match_node() fails, this init function bails out without calling of_node_put(). Also change of_node_put(of_root) to of_node_put(np); both of them hold the same pointer, but it seems better to call of_node_put() against the node returned by of_find_node_by_path(). Signed-off-by: Masahiro Yamada Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 3646b143bbf5..0bb44d5b5df4 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = { static int __init cpufreq_dt_platdev_init(void) { struct device_node *np = of_find_node_by_path("/"); + const struct of_device_id *match; if (!np) return -ENODEV; - if (!of_match_node(machines, np)) + match = of_match_node(machines, np); + of_node_put(np); + if (!match) return -ENODEV; - of_node_put(of_root); - return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, NULL, 0)); } From 742c87bf27d3b715820da6f8a81d6357adbf18f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Jun 2016 03:29:29 +0200 Subject: [PATCH 0941/1050] cpufreq: Avoid false-positive WARN_ON()s in cpufreq_update_policy() CPU notifications from the firmware coming in when cpufreq is suspended cause cpufreq_update_current_freq() to return 0 which triggers the WARN_ON() in cpufreq_update_policy() for no reason. Avoid that by checking cpufreq_suspended before calling cpufreq_update_current_freq(). Fixes: c9d9c929e674 (cpufreq: Abort cpufreq_update_current_freq() for cpufreq_suspended set) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.6+ # 4.6+ --- drivers/cpufreq/cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9009295f5134..5617c7087d77 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2261,6 +2261,10 @@ int cpufreq_update_policy(unsigned int cpu) * -> ask driver for current freq and notify governors about a change */ if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { + if (cpufreq_suspended) { + ret = -EAGAIN; + goto unlock; + } new_policy.cur = cpufreq_update_current_freq(policy); if (WARN_ON(!new_policy.cur)) { ret = -EIO; From f52e126cc7476196f44f3c313b7d9f0699a881fc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 28 Jun 2016 09:42:25 +0530 Subject: [PATCH 0942/1050] ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame) With recent binutils update to support dwarf CFI pseudo-ops in gas, we now get .eh_frame vs. .debug_frame. Although the call frame info is exactly the same in both, the CIE differs, which the current kernel unwinder can't cope with. This broke both the kernel unwinder as well as loadable modules (latter because of a new unhandled relo R_ARC_32_PCREL from .rela.eh_frame in the module loader) The ideal solution would be to switch unwinder to .eh_frame. For now however we can make do by just ensureing .debug_frame is generated by removing -fasynchronous-unwind-tables .eh_frame generated with -gdwarf-2 -fasynchronous-unwind-tables .debug_frame generated with -gdwarf-2 Fixes STAR 9001058196 Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d4df6be66d58..85814e74677d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -66,8 +66,6 @@ endif endif -cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables - # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok ifeq ($(atleast_gcc48),y) cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 From 9bd54517ee86cb164c734f72ea95aeba4804f10b Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 23 Jun 2016 11:00:39 +0300 Subject: [PATCH 0943/1050] arc: unwind: warn only once if DW2_UNWIND is disabled If CONFIG_ARC_DW2_UNWIND is disabled every time arc_unwind_core() gets called following message gets printed in debug console: ----------------->8--------------- CONFIG_ARC_DW2_UNWIND needs to be enabled ----------------->8--------------- That message makes sense if user indeed wants to see a backtrace or get nice function call-graphs in perf but what if user disabled unwinder for the purpose? Why pollute his debug console? So instead we'll warn user about possibly missing feature once and let him decide if that was what he or she really wanted. Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index e0efff15a5ae..b9192a653b7e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif From 5419447e2142d6ed68c9f5c1a28630b3a290a845 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 13 Jun 2016 17:03:48 +0200 Subject: [PATCH 0944/1050] Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL" This reverts commit 852ffd0f4e23248b47531058e531066a988434b5. There are use cases where an intermediate boot kernel (1) uses kexec to boot the final production kernel (2). For this scenario we should provide the original boot information to the production kernel (2). Therefore clearing the boot information during kexec() should not be done. Cc: stable@vger.kernel.org # v3.17+ Reported-by: Steffen Maier Signed-off-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..d14069d4b88d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2064,12 +2064,5 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = (unsigned long) s390_base_pgm_handler; - /* - * Clear subchannel ID and number to signal new kernel that no CCW or - * SCSI IPL has been done (for kexec and kdump) - */ - S390_lowcore.subchannel_id = 0; - S390_lowcore.subchannel_nr = 0; - do_reset_calls(); } From bcf4dd5f9ee096bd1510f838dd4750c35df4e38b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 27 Jun 2016 17:06:45 +0200 Subject: [PATCH 0945/1050] s390: fix test_fp_ctl inline assembly contraints The test_fp_ctl function is used to test if a given value is a valid floating-point control. The inline assembly in test_fp_ctl uses an incorrect constraint for the 'orig_fpc' variable. If the compiler chooses the same register for 'fpc' and 'orig_fpc' the test_fp_ctl() function always returns true. This allows user space to trigger kernel oopses with invalid floating-point control values on the signal stack. This problem has been introduced with git commit 4725c86055f5bbdcdf "s390: fix save and restore of the floating-point-control register" Cc: stable@vger.kernel.org # v3.13+ Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/fpu/api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..8ae236b0f80b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc) " la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) + : "=d" (rc), "=&d" (orig_fpc) : "d" (fpc), "0" (-EINVAL)); return rc; } From 70a0dec45174c976c64b4c8c1d0898581f759948 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Thu, 23 Jun 2016 16:11:57 -0400 Subject: [PATCH 0946/1050] ipmr/ip6mr: Initialize the last assert time of mfc entries. This fixes wrong-interface signaling on 32-bit platforms for entries created when jiffies > 2^31 + MFC_ASSERT_THRESH. Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a38e296fe2..5ad48ec77710 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8346..487ef3bc7bbc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } From 0622cab0341cac6b30da177b0faa39fae0680e71 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 23 Jun 2016 14:20:51 -0700 Subject: [PATCH 0947/1050] bonding: fix 802.3ad aggregator reselection Since commit 7bb11dc9f59d ("bonding: unify all places where actor-oper key needs to be updated."), the logic in bonding to handle selection between multiple aggregators has not functioned. This affects only configurations wherein the bonding slaves connect to two discrete aggregators (e.g., two independent switches, each with LACP enabled), thus creating two separate aggregation groups within a single bond. The cause is a change in 7bb11dc9f59d to no longer set AD_PORT_BEGIN on a port after a link state change, which would cause the port to be reselected for attachment to an aggregator as if were newly added to the bond. We cannot restore the prior behavior, as it contradicts IEEE 802.1AX 5.4.12, which requires ports that "become inoperable" (lose carrier, setting port_enabled=false as per 802.1AX 5.4.7) to remain selected (i.e., assigned to the aggregator). As the port now remains selected, the aggregator selection logic is not invoked. A side effect of this change is that aggregators in bonding will now contain ports that are link down. The aggregator selection logic does not currently handle this situation correctly, causing incorrect aggregator selection. This patch makes two changes to repair the aggregator selection logic in bonding to function as documented and within the confines of the standard: First, the aggregator selection and related logic now utilizes the number of active ports per aggregator, not the number of selected ports (as some selected ports may be down). The ad_select "bandwidth" and "count" options only consider ports that are link up. Second, on any carrier state change of any slave, the aggregator selection logic is explicitly called to insure the correct aggregator is active. Reported-by: Veli-Matti Lintu Fixes: 7bb11dc9f59d ("bonding: unify all places where actor-oper key needs to be updated.") Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 64 ++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index b9304a295f86..ca81f46ea1aa 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -657,6 +657,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) } } +static int __agg_active_ports(struct aggregator *agg) +{ + struct port *port; + int active = 0; + + for (port = agg->lag_ports; port; + port = port->next_port_in_aggregator) { + if (port->is_enabled) + active++; + } + + return active; +} + /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at @@ -664,39 +678,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { + int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; - if (aggregator->num_of_ports) { + if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: - bandwidth = aggregator->num_of_ports; + bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: - bandwidth = aggregator->num_of_ports * 10; + bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: - bandwidth = aggregator->num_of_ports * 100; + bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: - bandwidth = aggregator->num_of_ports * 1000; + bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: - bandwidth = aggregator->num_of_ports * 2500; + bandwidth = nports * 2500; break; case AD_LINK_SPEED_10000MBPS: - bandwidth = aggregator->num_of_ports * 10000; + bandwidth = nports * 10000; break; case AD_LINK_SPEED_20000MBPS: - bandwidth = aggregator->num_of_ports * 20000; + bandwidth = nports * 20000; break; case AD_LINK_SPEED_40000MBPS: - bandwidth = aggregator->num_of_ports * 40000; + bandwidth = nports * 40000; break; case AD_LINK_SPEED_56000MBPS: - bandwidth = aggregator->num_of_ports * 56000; + bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: - bandwidth = aggregator->num_of_ports * 100000; + bandwidth = nports * 100000; break; default: bandwidth = 0; /* to silence the compiler */ @@ -1530,10 +1545,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: - if (curr->num_of_ports > best->num_of_ports) + if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; - if (curr->num_of_ports < best->num_of_ports) + if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; /*FALLTHROUGH*/ @@ -1561,8 +1576,14 @@ static int agg_device_up(const struct aggregator *agg) if (!port) return 0; - return netif_running(port->slave->dev) && - netif_carrier_ok(port->slave->dev); + for (port = agg->lag_ports; port; + port = port->next_port_in_aggregator) { + if (netif_running(port->slave->dev) && + netif_carrier_ok(port->slave->dev)) + return 1; + } + + return 0; } /** @@ -1610,7 +1631,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, agg->is_active = 0; - if (agg->num_of_ports && agg_device_up(agg)) + if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } @@ -1622,7 +1643,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, * answering partner. */ if (active && active->lag_ports && - active->lag_ports->is_enabled && + __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { @@ -2133,7 +2154,7 @@ void bond_3ad_unbind_slave(struct slave *slave) else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; - if (temp_aggregator->num_of_ports == 0) { + if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; ad_clear_agg(temp_aggregator); if (select_new_active_agg) { @@ -2432,7 +2453,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) */ void bond_3ad_handle_link_change(struct slave *slave, char link) { + struct aggregator *agg; struct port *port; + bool dummy; port = &(SLAVE_AD_INFO(slave)->port); @@ -2459,6 +2482,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port->is_enabled = false; ad_update_actor_keys(port, true); } + agg = __get_first_agg(port); + ad_agg_selection_logic(agg, &dummy); + netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); @@ -2499,7 +2525,7 @@ int bond_3ad_set_carrier(struct bonding *bond) active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ - if (active->num_of_ports < bond->params.min_links) { + if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; From d2b13233879ca1268a1c027d4573109e5a777811 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:23:12 -0700 Subject: [PATCH 0948/1050] net: bgmac: Fix SOF bit checking We are checking for the Start of Frame bit in the ctl1 word, while this bit is set in the ctl0 word instead. Read the ctl0 word and update the check to verify that. Fixes: 9cde94506eac ("bgmac: implement scatter/gather support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index ee5f431ab32a..70926c611f25 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -267,15 +267,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) while (ring->start != ring->end) { int slot_idx = ring->start % BGMAC_TX_RING_SLOTS; struct bgmac_slot_info *slot = &ring->slots[slot_idx]; - u32 ctl1; + u32 ctl0, ctl1; int len; if (slot_idx == empty_slot) break; + ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0); ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1); len = ctl1 & BGMAC_DESC_CTL1_LEN; - if (ctl1 & BGMAC_DESC_CTL0_SOF) + if (ctl0 & BGMAC_DESC_CTL0_SOF) /* Unmap no longer used buffer */ dma_unmap_single(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); From c3897f2a69e54dd113fc9abd2daf872e5b495798 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:32 -0700 Subject: [PATCH 0949/1050] net: bgmac: Start transmit queue in bgmac_open The driver does not start the transmit queue in bgmac_open(). If the queue was stopped prior to closing then re-opening the interface, we would never be able to wake-up again. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 70926c611f25..85cd07f72ffb 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1314,6 +1314,9 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); netif_carrier_on(net_dev); + + netif_start_queue(net_dev); + return 0; } From 3894396e64994f31c3ef5c7e6f63dded0593e567 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:33 -0700 Subject: [PATCH 0950/1050] net: bgmac: Remove superflous netif_carrier_on() bgmac_open() calls phy_start() to initialize the PHY state machine, which will set the interface's carrier state accordingly, no need to force that as this could be conflicting with the PHY state determined by PHYLIB. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 85cd07f72ffb..a6333d38ecc0 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1313,8 +1313,6 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); - netif_carrier_on(net_dev); - netif_start_queue(net_dev); return 0; From f299a02d5f13c4deb52c1a7ddf2b42630fe6294a Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Fri, 24 Jun 2016 08:52:11 +0800 Subject: [PATCH 0951/1050] net/mlx5: use mlx5_buf_alloc_node instead of mlx5_buf_alloc in mlx5_wq_ll_create Commit 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") introduced mlx5_*_alloc_node() but missed changing some calling and warn messages. This patch introduces 2 changes: * Use mlx5_buf_alloc_node() instead of mlx5_buf_alloc() in mlx5_wq_ll_create() * Update the failure warn messages with _node postfix for mlx5_*_alloc function names Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") Signed-off-by: Wang Sheng-Hui Acked-By: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index ce21ee5b2357..821a087c7ae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf); + err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } From 126e7557328a1cd576be4fca95b133a2695283ff Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sun, 19 Jun 2016 23:51:02 +0300 Subject: [PATCH 0952/1050] mac80211: Fix mesh estab_plinks counting in STA removal case If a user space program (e.g., wpa_supplicant) deletes a STA entry that is currently in NL80211_PLINK_ESTAB state, the number of established plinks counter was not decremented and this could result in rejecting new plink establishment before really hitting the real maximum plink limit. For !user_mpm case, this decrementation is handled by mesh_plink_deactive(). Fix this by decrementing estab_plinks on STA deletion (mesh_sta_cleanup() gets called from there) so that the counter has a correct value and the Beacon frame advertisement in Mesh Configuration element shows the proper value for capability to accept additional peers. Cc: stable@vger.kernel.org Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 21b1fdf5d01d..6a1603bcdced 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -148,14 +148,17 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); From cca0e542e02e48cce541a49c4046ec094ec27c1e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 24 Jun 2016 14:49:02 +1000 Subject: [PATCH 0953/1050] powerpc/eeh: Fix wrong argument passed to eeh_rmv_device() When calling eeh_rmv_device() in eeh_reset_device() for partial hotplug case, @rmv_data instead of its address is the proper argument. Otherwise, the stack frame is corrupted when writing to @rmv_data (actually its address) in eeh_rmv_device(). It results in kernel crash as observed. This fixes the issue by passing @rmv_data, not its address to eeh_rmv_device() in eeh_reset_device(). Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE") Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b5f73cb5eeb6..d70101e1e25c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* From 62630ea768869beeb1e514b0bf5607a0c9b93d12 Mon Sep 17 00:00:00 2001 From: Allen Hung Date: Thu, 23 Jun 2016 16:31:29 +0800 Subject: [PATCH 0954/1050] Revert "HID: multitouch: enable palm rejection if device implements confidence usage" This reverts commit 25a84db15b3f ("HID: multitouch: enable palm rejection if device implements confidence usage") The commit enables palm rejection for Win8 Precision Touchpad devices but the quirk MT_QUIRK_VALID_IS_CONFIDENCE it is using is not working very properly. This quirk is originally designed for some WIn7 touchscreens. Use of this for a Win8 Precision Touchpad will cause unexpected pointer jumping problem. Cc: stable@vger.kernel.org # v4.5+ Reviewed-by: Benjamin Tissoires Tested-by: Andy Lutomirski # XPS 13 9350, BIOS 1.4.3 Signed-off-by: Allen Hung Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 95b7d61d9910..4ef700688657 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -502,11 +502,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, mt_store_field(usage, td, hi); return 1; case HID_DG_CONFIDENCE: - if (cls->name == MT_CLS_WIN_8 && - field->application == HID_DG_TOUCHPAD) { - cls->quirks &= ~MT_QUIRK_ALWAYS_VALID; - cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE; - } mt_store_field(usage, td, hi); return 1; case HID_DG_TIPSWITCH: From 6dd2e27a103d716921cc4a1a96a9adc0a8e3ab57 Mon Sep 17 00:00:00 2001 From: Allen Hung Date: Thu, 23 Jun 2016 16:31:30 +0800 Subject: [PATCH 0955/1050] HID: multitouch: enable palm rejection for Windows Precision Touchpad The usage Confidence is mandary to Windows Precision Touchpad devices. If it is examined in input_mapping on a WIndows Precision Touchpad, a new add quirk MT_QUIRK_CONFIDENCE desgned for such devices will be applied to the device. A touch with the confidence bit is not set is determined as invalid. Tested on Dell XPS13 9343 Cc: stable@vger.kernel.org # v4.5+ Reviewed-by: Benjamin Tissoires Tested-by: Andy Lutomirski # XPS 13 9350, BIOS 1.4.3 Signed-off-by: Allen Hung Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 4ef700688657..fb6f1f447279 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -61,6 +61,7 @@ MODULE_LICENSE("GPL"); #define MT_QUIRK_ALWAYS_VALID (1 << 4) #define MT_QUIRK_VALID_IS_INRANGE (1 << 5) #define MT_QUIRK_VALID_IS_CONFIDENCE (1 << 6) +#define MT_QUIRK_CONFIDENCE (1 << 7) #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE (1 << 8) #define MT_QUIRK_NO_AREA (1 << 9) #define MT_QUIRK_IGNORE_DUPLICATES (1 << 10) @@ -78,6 +79,7 @@ struct mt_slot { __s32 contactid; /* the device ContactID assigned to this slot */ bool touch_state; /* is the touch valid? */ bool inrange_state; /* is the finger in proximity of the sensor? */ + bool confidence_state; /* is the touch made by a finger? */ }; struct mt_class { @@ -502,6 +504,9 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, mt_store_field(usage, td, hi); return 1; case HID_DG_CONFIDENCE: + if (cls->name == MT_CLS_WIN_8 && + field->application == HID_DG_TOUCHPAD) + cls->quirks |= MT_QUIRK_CONFIDENCE; mt_store_field(usage, td, hi); return 1; case HID_DG_TIPSWITCH: @@ -614,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input) return; if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) { + int active; int slotnum = mt_compute_slot(td, input); struct mt_slot *s = &td->curdata; struct input_mt *mt = input->mt; @@ -628,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input) return; } + if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE)) + s->confidence_state = 1; + active = (s->touch_state || s->inrange_state) && + s->confidence_state; + input_mt_slot(input, slotnum); - input_mt_report_slot_state(input, MT_TOOL_FINGER, - s->touch_state || s->inrange_state); - if (s->touch_state || s->inrange_state) { + input_mt_report_slot_state(input, MT_TOOL_FINGER, active); + if (active) { /* this finger is in proximity of the sensor */ int wide = (s->w > s->h); /* divided by two to match visual scale of touch */ @@ -696,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field, td->curdata.touch_state = value; break; case HID_DG_CONFIDENCE: + if (quirks & MT_QUIRK_CONFIDENCE) + td->curdata.confidence_state = value; if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE) td->curvalid = value; break; From 0888d5f3c0f183ea6177355752ada433d370ac89 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 24 Jun 2016 12:35:18 +0200 Subject: [PATCH 0956/1050] Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bridge is falsly dropping ipv6 mulitcast packets if there is: 1. No ipv6 address assigned on the brigde. 2. No external mld querier present. 3. The internal querier enabled. When the bridge fails to build mld queries, because it has no ipv6 address, it slilently returns, but keeps the local querier enabled. This specific case causes confusing packet loss. Ipv6 multicast snooping can only work if: a) An external querier is present OR b) The bridge has an ipv6 address an is capable of sending own queries Otherwise it has to forward/flood the ipv6 multicast traffic, because snooping cannot work. This patch fixes the issue by adding a flag to the bridge struct that indicates that there is currently no ipv6 address assinged to the bridge and returns a false state for the local querier in __br_multicast_querier_exists(). Special thanks to Linus Lüssing. Fixes: d1d81d4c3dd8 ("bridge: check return value of ipv6_dev_get_saddr()") Signed-off-by: Daniel Danzberger Acked-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++++ net/bridge/br_private.h | 23 +++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6852f3c7009c..43844144c9c4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c7fb5d7a7218..52edecf3c294 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -314,6 +314,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -588,10 +589,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -599,10 +612,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; From ab8ed951080e8f59b1da4ea10ac7c05ba24a3cbd Mon Sep 17 00:00:00 2001 From: Aaron Campbell Date: Fri, 24 Jun 2016 10:05:32 -0300 Subject: [PATCH 0957/1050] connector: fix out-of-order cn_proc netlink message delivery The proc connector messages include a sequence number, allowing userspace programs to detect lost messages. However, performing this detection is currently more difficult than necessary, since netlink messages can be delivered to the application out-of-order. To fix this, leave pre-emption disabled during cn_netlink_send(), and use GFP_NOWAIT. The following was written as a test case. Building the kernel w/ make -j32 proved a reliable way to generate out-of-order cn_proc messages. int main(int argc, char *argv[]) { static uint32_t last_seq[CPU_SETSIZE], seq; int cpu, fd; struct sockaddr_nl sa; struct __attribute__((aligned(NLMSG_ALIGNTO))) { struct nlmsghdr nl_hdr; struct __attribute__((__packed__)) { struct cn_msg cn_msg; struct proc_event cn_proc; }; } rmsg; struct __attribute__((aligned(NLMSG_ALIGNTO))) { struct nlmsghdr nl_hdr; struct __attribute__((__packed__)) { struct cn_msg cn_msg; enum proc_cn_mcast_op cn_mcast; }; } smsg; fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); if (fd < 0) { perror("socket"); } sa.nl_family = AF_NETLINK; sa.nl_groups = CN_IDX_PROC; sa.nl_pid = getpid(); if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) { perror("bind"); } memset(&smsg, 0, sizeof(smsg)); smsg.nl_hdr.nlmsg_len = sizeof(smsg); smsg.nl_hdr.nlmsg_pid = getpid(); smsg.nl_hdr.nlmsg_type = NLMSG_DONE; smsg.cn_msg.id.idx = CN_IDX_PROC; smsg.cn_msg.id.val = CN_VAL_PROC; smsg.cn_msg.len = sizeof(enum proc_cn_mcast_op); smsg.cn_mcast = PROC_CN_MCAST_LISTEN; if (send(fd, &smsg, sizeof(smsg), 0) != sizeof(smsg)) { perror("send"); } while (recv(fd, &rmsg, sizeof(rmsg), 0) == sizeof(rmsg)) { cpu = rmsg.cn_proc.cpu; if (cpu < 0) { continue; } seq = rmsg.cn_msg.seq; if ((last_seq[cpu] != 0) && (seq != last_seq[cpu] + 1)) { printf("out-of-order seq=%d on cpu=%d\n", seq, cpu); } last_seq[cpu] = seq; } /* NOTREACHED */ perror("recv"); return -1; } Signed-off-by: Aaron Campbell Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 43 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 15d06fcf0b50..b02f9c606e0b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -56,11 +56,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; /* proc_event_counts is used as the sequence number of the netlink message */ static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; -static inline void get_seq(__u32 *ts, int *cpu) +static inline void send_msg(struct cn_msg *msg) { preempt_disable(); - *ts = __this_cpu_inc_return(proc_event_counts) - 1; - *cpu = smp_processor_id(); + + msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; + ((struct proc_event *)msg->data)->cpu = smp_processor_id(); + + /* + * Preemption remains disabled during send to ensure the messages are + * ordered according to their sequence numbers. + * + * If cn_netlink_send() fails, the data is not sent. + */ + cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); + preempt_enable(); } @@ -77,7 +87,6 @@ void proc_fork_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_FORK; rcu_read_lock(); @@ -92,8 +101,7 @@ void proc_fork_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - /* If cn_netlink_send() failed, the data is not sent */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_exec_connector(struct task_struct *task) @@ -108,7 +116,6 @@ void proc_exec_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; @@ -118,7 +125,7 @@ void proc_exec_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_id_connector(struct task_struct *task, int which_id) @@ -150,14 +157,13 @@ void proc_id_connector(struct task_struct *task, int which_id) return; } rcu_read_unlock(); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_sid_connector(struct task_struct *task) @@ -172,7 +178,6 @@ void proc_sid_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_SID; ev->event_data.sid.process_pid = task->pid; @@ -182,7 +187,7 @@ void proc_sid_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_ptrace_connector(struct task_struct *task, int ptrace_id) @@ -197,7 +202,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_PTRACE; ev->event_data.ptrace.process_pid = task->pid; @@ -215,7 +219,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_comm_connector(struct task_struct *task) @@ -230,7 +234,6 @@ void proc_comm_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COMM; ev->event_data.comm.process_pid = task->pid; @@ -241,7 +244,7 @@ void proc_comm_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_coredump_connector(struct task_struct *task) @@ -256,7 +259,6 @@ void proc_coredump_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COREDUMP; ev->event_data.coredump.process_pid = task->pid; @@ -266,7 +268,7 @@ void proc_coredump_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_exit_connector(struct task_struct *task) @@ -281,7 +283,6 @@ void proc_exit_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; @@ -293,7 +294,7 @@ void proc_exit_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } /* @@ -325,7 +326,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) msg->ack = rcvd_ack + 1; msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } /** From 9a0fee2b552b1235fb1706ae1fc664ae74573be8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jun 2016 16:02:35 -0400 Subject: [PATCH 0958/1050] sock_diag: do not broadcast raw socket destruction Diag intends to broadcast tcp_sk and udp_sk socket destruction. Testing sk->sk_protocol for IPPROTO_TCP/IPPROTO_UDP alone is not sufficient for this. Raw sockets can have the same type. Add a test for sk->sk_type. Fixes: eb4cb008529c ("sock_diag: define destruction multicast groups") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 4018b48f2b3b..a0596ca0e80a 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) { switch (sk->sk_family) { case AF_INET: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET_TCP_DESTROY; @@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) return SKNLGRP_NONE; } case AF_INET6: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET6_TCP_DESTROY; From 76a658c20efd541a62838d9ff68ce94170d7a549 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 28 Jun 2016 12:06:58 -0400 Subject: [PATCH 0959/1050] audit: move calcs after alloc and check when logging set loginuid Move the calculations of values after the allocation in case the allocation fails. This avoids wasting effort in the rare case that it fails, but more importantly saves us extra logic to release the tty ref. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditsc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 71e14d836e69..33dafa70229d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1985,14 +1985,15 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, if (!audit_enabled) return; + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), tty = audit_get_tty(current); - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (!ab) - return; audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); audit_log_task_context(ab); audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", From 3f5be2da8565c1cce5655bb0948fcc957c6eb6c6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 28 Jun 2016 12:07:50 -0400 Subject: [PATCH 0960/1050] audit: move audit_get_tty to reduce scope and kabi changes The only users of audit_get_tty and audit_put_tty are internal to audit, so move it out of include/linux/audit.h to kernel.h and create a proper function rather than inlining it. This also reduces kABI changes. Suggested-by: Paul Moore Signed-off-by: Richard Guy Briggs [PM: line wrapped description] Signed-off-by: Paul Moore --- include/linux/audit.h | 24 ------------------------ kernel/audit.c | 17 +++++++++++++++++ kernel/audit.h | 4 ++++ kernel/auditsc.c | 1 - 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/linux/audit.h b/include/linux/audit.h index 32cdafb312d8..b40ed5df5542 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -26,7 +26,6 @@ #include #include #include -#include #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -344,23 +343,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid; } -static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) -{ - struct tty_struct *tty = NULL; - unsigned long flags; - - spin_lock_irqsave(&tsk->sighand->siglock, flags); - if (tsk->signal) - tty = tty_kref_get(tsk->signal->tty); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); - return tty; -} - -static inline void audit_put_tty(struct tty_struct *tty) -{ - tty_kref_put(tty); -} - extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); @@ -518,12 +500,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return -1; } -static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) -{ - return NULL; -} -static inline void audit_put_tty(struct tty_struct *tty) -{ } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, diff --git a/kernel/audit.c b/kernel/audit.c index 384374a1d232..d5971010e44a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1866,6 +1866,23 @@ out_null: audit_log_format(ab, " exe=(null)"); } +struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + struct tty_struct *tty = NULL; + unsigned long flags; + + spin_lock_irqsave(&tsk->sighand->siglock, flags); + if (tsk->signal) + tty = tty_kref_get(tsk->signal->tty); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + return tty; +} + +void audit_put_tty(struct tty_struct *tty) +{ + tty_kref_put(tty); +} + void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; diff --git a/kernel/audit.h b/kernel/audit.h index cbbe6bb6496e..a492f4c4e710 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -23,6 +23,7 @@ #include #include #include +#include /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). If we get more names we will allocate @@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); extern void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm); +extern struct tty_struct *audit_get_tty(struct task_struct *tsk); +extern void audit_put_tty(struct tty_struct *tty); + /* audit watch functions */ #ifdef CONFIG_AUDIT_WATCH extern void audit_put_watch(struct audit_watch *watch); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 33dafa70229d..60a354eed2fa 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include From e547f2628327fec6afd2e03b46f113f614cca05b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 25 Jun 2016 19:19:28 -0400 Subject: [PATCH 0961/1050] NFS: Fix another OPEN_DOWNGRADE bug Olga Kornievskaia reports that the following test fails to trigger an OPEN_DOWNGRADE on the wire, and only triggers the final CLOSE. fd0 = open(foo, RDRW) -- should be open on the wire for "both" fd1 = open(foo, RDONLY) -- should be open on the wire for "read" close(fd0) -- should trigger an open_downgrade read(fd1) close(fd1) The issue is that we're missing a check for whether or not the current state transitioned from an O_RDWR state as opposed to having transitioned from a combination of O_RDONLY and O_WRONLY. Reported-by: Olga Kornievskaia Fixes: cd9288ffaea4 ("NFSv4: Fix another bug in the close/open_downgrade code") Cc: stable@vger.kernel.org # 2.6.33+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 406dd3eb68e2..ff416d0e24bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) call_close |= is_wronly; else if (is_wronly) calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (calldata->arg.fmode == 0) - call_close |= is_rdwr; - if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); From 190ce8693c23eae09ba5f303a83bf2fbeb6478b1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 28 Jun 2016 13:01:04 +1000 Subject: [PATCH 0962/1050] powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0 Currently we have 2 segments that are bolted for the kernel linear mapping (ie 0xc000... addresses). This is 0 to 1TB and also the kernel stacks. Anything accessed outside of these regions may need to be faulted in. (In practice machines with TM always have 1T segments) If a machine has < 2TB of memory we never fault on the kernel linear mapping as these two segments cover all physical memory. If a machine has > 2TB of memory, there may be structures outside of these two segments that need to be faulted in. This faulting can occur when running as a guest as the hypervisor may remove any SLB that's not bolted. When we treclaim and trecheckpoint we have a window where we need to run with the userspace GPRs. This means that we no longer have a valid stack pointer in r1. For this window we therefore clear MSR RI to indicate that any exceptions taken at this point won't be able to be handled. This means that we can't take segment misses in this RI=0 window. In this RI=0 region, we currently access the thread_struct for the process being context switched to or from. This thread_struct access may cause a segment fault since it's not guaranteed to be covered by the two bolted segment entries described above. We've seen this with a crash when running as a guest with > 2TB of memory on PowerVM: Unrecoverable exception 4100 at c00000000004f138 Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries CPU: 1280 PID: 7755 Comm: kworker/1280:1 Tainted: G X 4.4.13-46-default #1 task: c000189001df4210 ti: c000189001d5c000 task.ti: c000189001d5c000 NIP: c00000000004f138 LR: 0000000010003a24 CTR: 0000000010001b20 REGS: c000189001d5f730 TRAP: 4100 Tainted: G X (4.4.13-46-default) MSR: 8000000100001031 CR: 24000048 XER: 00000000 CFAR: c00000000004ed18 SOFTE: 0 GPR00: ffffffffc58d7b60 c000189001d5f9b0 00000000100d7d00 000000003a738288 GPR04: 0000000000002781 0000000000000006 0000000000000000 c0000d1f4d889620 GPR08: 000000000000c350 00000000000008ab 00000000000008ab 00000000100d7af0 GPR12: 00000000100d7ae8 00003ffe787e67a0 0000000000000000 0000000000000211 GPR16: 0000000010001b20 0000000000000000 0000000000800000 00003ffe787df110 GPR20: 0000000000000001 00000000100d1e10 0000000000000000 00003ffe787df050 GPR24: 0000000000000003 0000000000010000 0000000000000000 00003fffe79e2e30 GPR28: 00003fffe79e2e68 00000000003d0f00 00003ffe787e67a0 00003ffe787de680 NIP [c00000000004f138] restore_gprs+0xd0/0x16c LR [0000000010003a24] 0x10003a24 Call Trace: [c000189001d5f9b0] [c000189001d5f9f0] 0xc000189001d5f9f0 (unreliable) [c000189001d5fb90] [c00000000001583c] tm_recheckpoint+0x6c/0xa0 [c000189001d5fbd0] [c000000000015c40] __switch_to+0x2c0/0x350 [c000189001d5fc30] [c0000000007e647c] __schedule+0x32c/0x9c0 [c000189001d5fcb0] [c0000000007e6b58] schedule+0x48/0xc0 [c000189001d5fce0] [c0000000000deabc] worker_thread+0x22c/0x5b0 [c000189001d5fd80] [c0000000000e7000] kthread+0x110/0x130 [c000189001d5fe30] [c000000000009538] ret_from_kernel_thread+0x5c/0xa4 Instruction dump: 7cb103a6 7cc0e3a6 7ca222a6 78a58402 38c00800 7cc62838 08860000 7cc000a6 38a00006 78c60022 7cc62838 0b060000 7ccff120 e8270078 e8a70098 ---[ end trace 602126d0a1dedd54 ]--- This fixes this by copying the required data from the thread_struct to the stack before we clear MSR RI. Then once we clear RI, we only access the stack, guaranteeing there's no segment miss. We also tighten the region over which we set RI=0 on the treclaim() path. This may have a slight performance impact since we're adding an mtmsr instruction. Fixes: 090b9284d725 ("powerpc/tm: Clear MSR RI in non-recoverable TM code") Signed-off-by: Michael Neuling Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/tm.S | 61 +++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..b7019b559ddb 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -397,11 +406,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +443,33 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT From a4859d75944a726533ab86d24bb5ffd1b2b7d6cc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jun 2016 08:26:59 +0200 Subject: [PATCH 0963/1050] ovl: fix dentry leak for default_permissions When using the 'default_permissions' mount option, ovl_permission() on non-directories was missing a dput(alias), resulting in "BUG Dentry still in use". Signed-off-by: Miklos Szeredi Fixes: 8d3095f4ad47 ("ovl: default permissions") Cc: # v4.5+ --- fs/overlayfs/inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1dbeab6cf96e..8514d692042b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -121,16 +121,18 @@ int ovl_permission(struct inode *inode, int mask) err = vfs_getattr(&realpath, &stat); if (err) - return err; + goto out_dput; + err = -ESTALE; if ((stat.mode ^ inode->i_mode) & S_IFMT) - return -ESTALE; + goto out_dput; inode->i_mode = stat.mode; inode->i_uid = stat.uid; inode->i_gid = stat.gid; - return generic_permission(inode, mask); + err = generic_permission(inode, mask); + goto out_dput; } /* Careful in RCU walk mode */ From 69fc58a57e56bf5e39b48809aefffdaa1b04c945 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 24 Jun 2016 16:25:24 -0700 Subject: [PATCH 0964/1050] net: phy: Manage fixed PHY address space using IDA If we have a system which uses fixed PHY devices and calls fixed_phy_register() then fixed_phy_unregister() we can exhaust the number of fixed PHYs available after a while, since we keep incrementing the variable phy_fixed_addr, but we never decrement it. This patch fixes that by converting the fixed PHY allocation to using IDA, which takes care of the allocation/dealloaction of the PHY addresses for us. Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 2d2e4339f0df..9ec7f7353434 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -23,6 +23,7 @@ #include #include #include +#include #define MII_REGS_NUM 29 @@ -286,6 +287,8 @@ err_regs: } EXPORT_SYMBOL_GPL(fixed_phy_add); +static DEFINE_IDA(phy_fixed_ida); + static void fixed_phy_del(int phy_addr) { struct fixed_mdio_bus *fmb = &platform_fmb; @@ -297,14 +300,12 @@ static void fixed_phy_del(int phy_addr) if (gpio_is_valid(fp->link_gpio)) gpio_free(fp->link_gpio); kfree(fp); + ida_simple_remove(&phy_fixed_ida, phy_addr); return; } } } -static int phy_fixed_addr; -static DEFINE_SPINLOCK(phy_fixed_addr_lock); - struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, int link_gpio, @@ -319,17 +320,15 @@ struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(-EPROBE_DEFER); /* Get the next available PHY address, up to PHY_MAX_ADDR */ - spin_lock(&phy_fixed_addr_lock); - if (phy_fixed_addr == PHY_MAX_ADDR) { - spin_unlock(&phy_fixed_addr_lock); - return ERR_PTR(-ENOSPC); - } - phy_addr = phy_fixed_addr++; - spin_unlock(&phy_fixed_addr_lock); + phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); + if (phy_addr < 0) + return ERR_PTR(phy_addr); ret = fixed_phy_add(irq, phy_addr, status, link_gpio); - if (ret < 0) + if (ret < 0) { + ida_simple_remove(&phy_fixed_ida, phy_addr); return ERR_PTR(ret); + } phy = get_phy_device(fmb->mii_bus, phy_addr, false); if (IS_ERR(phy)) { @@ -434,6 +433,7 @@ static void __exit fixed_mdio_bus_exit(void) list_del(&fp->node); kfree(fp); } + ida_destroy(&phy_fixed_ida); } module_exit(fixed_mdio_bus_exit); From 0b3dd7dfb81ad8af53791ea2bb64b83bac1b7d32 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sun, 26 Jun 2016 11:16:09 +0200 Subject: [PATCH 0965/1050] batman-adv: replace WARN with rate limited output on non-existing VLAN If a VLAN tagged frame is received and the corresponding VLAN is not configured on the soft interface, it will splat a WARN on every packet received. This is a quite annoying behaviour for some scenarios, e.g. if bat0 is bridged with eth0, and there are arbitrary VLAN tagged frames from Ethernet coming in without having any VLAN configuration on bat0. The code should probably create vlan objects on the fly and transparently transport these VLAN-tagged Ethernet frames, but until this is done, at least the WARN splat should be replaced by a rate limited output. Fixes: 354136bcc3c4 ("batman-adv: fix kernel crash due to missing NULL checks") Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index feaf492b01ca..72abab7b01eb 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -650,8 +650,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); - if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) { + if (!vlan) { + net_ratelimited_function(batadv_info, soft_iface, + "adding TT local entry %pM to non-existent VLAN %d\n", + addr, BATADV_PRINT_VID(vid)); kfree(tt_local); tt_local = NULL; goto out; From 9c4604a298e0a9807eaf2cd912d1ebf24d98fbeb Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:10 +0200 Subject: [PATCH 0966/1050] batman-adv: Fix use-after-free/double-free of tt_req_node The tt_req_node is added and removed from a list inside a spinlock. But the locking is sometimes removed even when the object is still referenced and will be used later via this reference. For example batadv_send_tt_request can create a new tt_req_node (including add to a list) and later re-acquires the lock to remove it from the list and to free it. But at this time another context could have already removed this tt_req_node from the list and freed it. CPU#0 batadv_batman_skb_recv from net_device 0 -> batadv_iv_ogm_receive -> batadv_iv_ogm_process -> batadv_iv_ogm_process_per_outif -> batadv_tvlv_ogm_receive -> batadv_tvlv_ogm_receive -> batadv_tvlv_containers_process -> batadv_tvlv_call_handler -> batadv_tt_tvlv_ogm_handler_v1 -> batadv_tt_update_orig -> batadv_send_tt_request -> batadv_tt_req_node_new spin_lock(...) allocates new tt_req_node and adds it to list spin_unlock(...) return tt_req_node CPU#1 batadv_batman_skb_recv from net_device 1 -> batadv_recv_unicast_tvlv -> batadv_tvlv_containers_process -> batadv_tvlv_call_handler -> batadv_tt_tvlv_unicast_handler_v1 -> batadv_handle_tt_response spin_lock(...) tt_req_node gets removed from list and is freed spin_unlock(...) CPU#0 <- returned to batadv_send_tt_request spin_lock(...) tt_req_node gets removed from list and is freed MEMORY CORRUPTION/SEGFAULT/... spin_unlock(...) This can only be solved via reference counting to allow multiple contexts to handle the list manipulation while making sure that only the last context holding a reference will free the object. Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann Tested-by: Martin Weinelt Tested-by: Amadeus Alfa Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 43 +++++++++++++++++++++++++----- net/batman-adv/types.h | 2 ++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 72abab7b01eb..cfb5ccdfd62b 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2271,6 +2271,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, return crc; } +/** + * batadv_tt_req_node_release - free tt_req node entry + * @ref: kref pointer of the tt req_node entry + */ +static void batadv_tt_req_node_release(struct kref *ref) +{ + struct batadv_tt_req_node *tt_req_node; + + tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); + + kfree(tt_req_node); +} + +/** + * batadv_tt_req_node_put - decrement the tt_req_node refcounter and + * possibly release it + * @tt_req_node: tt_req_node to be free'd + */ +static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) +{ + kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); +} + static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; @@ -2280,7 +2303,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2317,7 +2340,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2349,9 +2372,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, if (!tt_req_node) goto unlock; + kref_init(&tt_req_node->refcount); ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; + kref_get(&tt_req_node->refcount); hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2615,13 +2640,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, out: if (primary_if) batadv_hardif_put(primary_if); + if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); - /* hlist_del_init() verifies tt_req_node still is in the list */ - hlist_del_init(&tt_req_node->list); + if (!hlist_unhashed(&tt_req_node->list)) { + hlist_del_init(&tt_req_node->list); + batadv_tt_req_node_put(tt_req_node); + } spin_unlock_bh(&bat_priv->tt.req_list_lock); - kfree(tt_req_node); } + + if (tt_req_node) + batadv_tt_req_node_put(tt_req_node); + kfree(tvlv_tt_data); return ret; } @@ -3057,7 +3088,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, if (!batadv_compare_eth(node->addr, resp_src)) continue; hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6a577f4f8ba7..ba846b078af8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1137,11 +1137,13 @@ struct batadv_tt_change_node { * struct batadv_tt_req_node - data to keep track of the tt requests in flight * @addr: mac address address of the originator this request was sent to * @issued_at: timestamp used for purging stale tt requests + * @refcount: number of contexts the object is used by * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { u8 addr[ETH_ALEN]; unsigned long issued_at; + struct kref refcount; struct hlist_node list; }; From baceced93274ff2f846eae991664f9094425ffa8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 26 Jun 2016 11:16:11 +0200 Subject: [PATCH 0967/1050] batman-adv: Fix double-put of vlan object Each batadv_tt_local_entry hold a single reference to a batadv_softif_vlan. In case a new entry cannot be added to the hash table, the error path puts the reference, but the reference will also now be dropped by batadv_tt_local_entry_release(). Fixes: a33d970d0b54 ("batman-adv: Fix reference counting of vlan object for tt_local_entry") Signed-off-by: Ben Hutchings Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cfb5ccdfd62b..57ec87f37050 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -693,7 +693,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_put(tt_local); - batadv_softif_vlan_put(vlan); goto out; } From 3b55e4422087f9f7b241031d758a0c65584e4297 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:12 +0200 Subject: [PATCH 0968/1050] batman-adv: Fix ICMP RR ethernet access after skb_linearize The skb_linearize may reallocate the skb. This makes the calculated pointer for ethhdr invalid. But it the pointer is used later to fill in the RR field of the batadv_icmp_packet_rr packet. Instead re-evaluate eth_hdr after the skb_linearize+skb_cow to fix the pointer and avoid the invalid read. Fixes: da6b8c20a5b8 ("batman-adv: generalize batman-adv icmp packet handling") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/routing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e3857ed4057f..6c2901a86230 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -374,6 +374,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (skb_cow(skb, ETH_HLEN) < 0) goto out; + ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) From 420cb1b764f9169c5d2601b4af90e4a1702345ee Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:13 +0200 Subject: [PATCH 0969/1050] batman-adv: Clean up untagged vlan when destroying via rtnl-link The untagged vlan object is only destroyed when the interface is removed via the legacy sysfs interface. But it also has to be destroyed when the standard rtnl-link interface is used. Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/soft-interface.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 343d2c904399..287a3879ed7e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1033,7 +1033,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) static void batadv_softif_destroy_netlink(struct net_device *soft_iface, struct list_head *head) { + struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) @@ -1041,6 +1043,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, BATADV_IF_CLEANUP_KEEP); } + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + batadv_sysfs_del_meshif(soft_iface); unregister_netdevice_queue(soft_iface, head); } From 3ec0a0f10ceb77c78f540ded1d13bf0cf7f89a07 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Mon, 27 Jun 2016 13:09:59 +0530 Subject: [PATCH 0970/1050] net: marvell: Add separate config ANEG function for Marvell 88E1111 Marvell 88E1111 currently uses the generic marvell config ANEG function. This function has a sequence accessing Page 5 and Register 31, both of which are not defined or reserved for this PHY. Hence this patch adds a new config ANEG function for Marvell 88E1111 without these erroneous accesses. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 44 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 79ccc11facc3..ec2c1eee6405 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -285,6 +285,48 @@ static int marvell_config_aneg(struct phy_device *phydev) return 0; } +static int m88e1111_config_aneg(struct phy_device *phydev) +{ + int err; + + /* The Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation + */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + err = marvell_set_polarity(phydev, phydev->mdix); + if (err < 0) + return err; + + err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL, + MII_M1111_PHY_LED_DIRECT); + if (err < 0) + return err; + + err = genphy_config_aneg(phydev); + if (err < 0) + return err; + + if (phydev->autoneg != AUTONEG_ENABLE) { + int bmcr; + + /* A write to speed/duplex bits (that is performed by + * genphy_config_aneg() call above) must be followed by + * a software reset. Otherwise, the write has no effect. + */ + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET); + if (err < 0) + return err; + } + + return 0; +} + #ifdef CONFIG_OF_MDIO /* * Set and/or override some configuration registers based on the @@ -1175,7 +1217,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1111_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1111_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, From 664a84d2c77cbff2945ed7f96d08afbba42b6293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 May 2016 20:53:56 +0300 Subject: [PATCH 0971/1050] drm/i915: Refresh cached DP port register value on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During hibernation the cached DP port register value will be left with whatever value we have there when we create the hibernation image. Currently that means the port (and eDP PLL) will be off in the cached value. However when we resume there is no guarantee that the value in the actual register will match the cached value. If i915 isn't loaded in the kernel that loads the hibernation image, the port may well be on (eg. left on by the BIOS). The encoder state readout does the right thing in this case and updates our encoder state to reflect the actual hardware state. However the post-resume modeset will then use the stale cached port register value in intel_dp_link_down() and potentially confuse the hardware. This was caught by the following assert WARNING: CPU: 3 PID: 5288 at ../drivers/gpu/drm/i915/intel_dp.c:2184 assert_edp_pll+0x99/0xa0 [i915] eDP PLL state assertion failure (expected on, current off) on account of the eDP PLL getting prematurely turned off when shutting down the port, since the DP_PLL_ENABLE bit wasn't set in the cached register value. Presumably I introduced this problem in commit 6fec76628333 ("drm/i915: Use intel_dp->DP in eDP PLL setup") as before that we didn't update the cached value after shuttting the port down. That's assuming the port got enabled at least once prior to hibernating. If that didn't happen then the cached value would still have been totally out of sync with reality (eg. first boot w/o eDP on, then hibernate, and then resume with eDP on). So, let's fix this properly and refresh the cached register value from the hardware register during resume. DDI platforms shouldn't use the cached value during port disable at least, so shouldn't have this particular issue. They might still have issues if we skip the initial modeset and then try to retrain the link or something. But untangling this DP vs. DDI mess is a bigger topic, so let's jut punt on DDI for now. Cc: Jani Nikula Cc: stable@vger.kernel.org Fixes: 6fec76628333 ("drm/i915: Use intel_dp->DP in eDP PLL setup") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1463162036-27931-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 64989ca4b27acb026b6496ec21e43bee66f86a5b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 79cf2d5f5a20..49c582d9eb5a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4899,13 +4899,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) void intel_dp_encoder_reset(struct drm_encoder *encoder) { - struct intel_dp *intel_dp; + struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!HAS_DDI(dev_priv)) + intel_dp->DP = I915_READ(intel_dp->output_reg); if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP) return; - intel_dp = enc_to_intel_dp(encoder); - pps_lock(intel_dp); /* From ea12e2a0cad62f6d634da62e7bf50ffe077de13b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Jun 2016 13:37:30 +0300 Subject: [PATCH 0972/1050] drm/i915/bxt: Avoid early timeout during PLL enable Since wait_for_atomic doesn't re-check the wait-for condition after expiry of the timeout it can fail when called from non-atomic context even if the condition is set correctly before the expiry. Fix this by using the non-atomic wait_for instead. I noticed this via the PLL locking timing out incorrectly, with this fix I couldn't reproduce the problem. Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity") CC: Chris Wilson CC: Tvrtko Ursulin Signed-off-by: Imre Deak Reviewed-by: Tvrtko Ursulin CC: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-2-git-send-email-imre.deak@intel.com (cherry picked from commit 0b786e41c73956126f6297764459021deef8aba7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index baf6f5584cbd..58f60b27837e 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1377,8 +1377,8 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp); POSTING_READ(BXT_PORT_PLL_ENABLE(port)); - if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_LOCK), 200)) + if (wait_for_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK), + 200)) DRM_ERROR("PLL %d not locked\n", port); /* From 11f6145791775ce41b8993fd29e19eb53ef0ff14 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Jun 2016 13:37:31 +0300 Subject: [PATCH 0973/1050] drm/i915/lpt: Avoid early timeout during FDI PHY reset Since wait_for_atomic doesn't re-check the wait-for condition after expiry of the timeout it can fail when called from non-atomic context even if the condition is set correctly before the expiry. Fix this by using the non-atomic wait_for instead. Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity") CC: Chris Wilson CC: Tvrtko Ursulin Signed-off-by: Imre Deak Reviewed-by: Tvrtko Ursulin CC: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-3-git-send-email-imre.deak@intel.com (cherry picked from commit cf3598c23cd09d5f063fa8c12fe9ddd5a352d3d5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56a1637c864f..a164ea44f668 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8447,16 +8447,16 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) tmp |= FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); - if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS, 100)) + if (wait_for_us(I915_READ(SOUTH_CHICKEN2) & + FDI_MPHY_IOSFSB_RESET_STATUS, 100)) DRM_ERROR("FDI mPHY reset assert timeout\n"); tmp = I915_READ(SOUTH_CHICKEN2); tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); - if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) + if (wait_for_us((I915_READ(SOUTH_CHICKEN2) & + FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) DRM_ERROR("FDI mPHY reset de-assert timeout\n"); } From fa7c13e5b1e2076b0ec716ed584ab76f9e65b7a6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Jun 2016 13:37:32 +0300 Subject: [PATCH 0974/1050] drm/i915/hsw: Avoid early timeout during LCPLL disable/restore Since wait_for_atomic doesn't re-check the wait-for condition after expiry of the timeout it can fail when called from non-atomic context even if the condition is set correctly before the expiry. Fix this by using the non-atomic wait_for instead. Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity") CC: Chris Wilson CC: Tvrtko Ursulin Signed-off-by: Imre Deak Reviewed-by: Tvrtko Ursulin CC: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-4-git-send-email-imre.deak@intel.com (cherry picked from commit f53dd63f1119a98a16d1a5a7cb3277a2f1ff483d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a164ea44f668..04452cf3eae8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9440,8 +9440,8 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, val |= LCPLL_CD_SOURCE_FCLK; I915_WRITE(LCPLL_CTL, val); - if (wait_for_atomic_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) DRM_ERROR("Switching to FCLK failed\n"); val = I915_READ(LCPLL_CTL); @@ -9514,8 +9514,8 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) val &= ~LCPLL_CD_SOURCE_FCLK; I915_WRITE(LCPLL_CTL, val); - if (wait_for_atomic_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); } From ba34a65324259082dc6d2924cb82d562db1c6a6b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Jun 2016 13:37:33 +0300 Subject: [PATCH 0975/1050] drm/i915: Avoid early timeout during AUX transfers Since wait_for_atomic doesn't re-check the wait-for condition after expiry of the timeout it can fail when called from non-atomic context even if the condition is set correctly before the expiry. Fix this by using the non-atomic wait_for instead. Due to the relatively long 10ms timeout, probably this didn't cause any real problems, but fix it in any case for consistency. Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity") CC: Chris Wilson CC: Tvrtko Ursulin Signed-off-by: Imre Deak Reviewed-by: Tvrtko Ursulin CC: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-5-git-send-email-imre.deak@intel.com (cherry picked from commit 713a6b668932213247b394559bc229cd0fec2777) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 49c582d9eb5a..40745e38d438 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -663,7 +663,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, msecs_to_jiffies_timeout(10)); else - done = wait_for_atomic(C, 10) == 0; + done = wait_for(C, 10) == 0; if (!done) DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n", has_aux_irq); From 5be1ea899da4f11de92897d2ea706e0820609b9e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 27 Jun 2016 12:08:32 +0300 Subject: [PATCH 0976/1050] net/mlx5: Update command strings Add command string for MODIFY_FLOW_TABLE which is used by the driver. Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index dcd2df6518de..0b4986268cc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -545,6 +545,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); default: return "unknown command opcode"; } } From 7092fe866907f4f243122ab388cbf0e77305afaa Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 27 Jun 2016 12:08:33 +0300 Subject: [PATCH 0977/1050] net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices Add the upcoming ConnectX-5 PCIe 4.0 device to the list of supported devices by the mlx5 driver. Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a19b59348dd6..c65f4a13e17e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1508,8 +1508,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ - { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5 */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ { 0, } }; From e0f46eb9f64ca2e97d242b6e02b8ae2c7fe6dc56 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 27 Jun 2016 12:08:34 +0300 Subject: [PATCH 0978/1050] net/mlx5e: Change enum to better reflect usage Change MLX5E_STATE_ASYNC_EVENTS_ENABLE to MLX5E_STATE_ASYNC_EVENTS_ENABLED since it represent a state and not an operation. Fixes: acff797cd1874 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e8a6c3325b39..baa991a23475 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -401,7 +401,7 @@ enum mlx5e_traffic_types { }; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f5c8d5db25a8..4383f8c737e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -244,7 +244,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, { struct mlx5e_priv *priv = vpriv; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; switch (event) { @@ -260,12 +260,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } From fd4782c21359cfd52af4c3180a2bb6bad55c1eba Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:35 +0300 Subject: [PATCH 0979/1050] net/mlx5e: Check for BlueFlame capability before allocating SQ uar Previous to this patch mapping was always set to write combining without checking whether BlueFlame is supported in the device. Fixes: 0ba422410bbf ('net/mlx5: Fix global UAR mapping') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4383f8c737e5..793d7a1f92b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -580,7 +580,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; - err = mlx5_alloc_map_uar(mdev, &sq->uar, true); + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; From 9ceec359e4ebdbbfd35375203c5bd06d602225f7 Mon Sep 17 00:00:00 2001 From: Matthew Finlay Date: Mon, 27 Jun 2016 12:08:36 +0300 Subject: [PATCH 0980/1050] net/mlx5e: Prevent adding the same vxlan port Do not allow the same vxlan udp port to be added to the device more than once. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Matthew Finlay Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f2fd1ef16da7..05de77267d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + if (mlx5e_vxlan_lookup_port(priv, port)) + goto free_work; + if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; From ed80ec4c179d1f44cc1b36c7a102353ae6103793 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:37 +0300 Subject: [PATCH 0981/1050] net/mlx5e: Fix number of PFC counters reported to ethtool Number of PFC counters used to count only number of priorities with PFC enabled, but each priority has more than one counter, hence the need to multiply it by the number of PFC counters per priority. Fixes: cf678570d5a1 ('net/mlx5e: Add per priority group to PPort counters') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fc7dcc03b1de..c709d41c4b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) #define MLX5E_NUM_SQ_STATS(priv) \ (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, &priv->state)) -#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv)) +#define MLX5E_NUM_PFC_COUNTERS(priv) \ + (hweight8(mlx5e_query_pfc_combined(priv)) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { From bfe6d8d1d433cbd5513a93132695e6dbdd79e6f2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:38 +0300 Subject: [PATCH 0982/1050] net/mlx5e: Reorganize ethtool statistics Categorize and reorganize ethtool statistics counters by renaming to "rx_*" and "tx_*" and removing redundant and duplicated counters, this way they are easier to grasp and more user friendly. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 30 ++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 +-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +- .../ethernet/mellanox/mlx5/core/en_stats.h | 228 ++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 4 +- 5 files changed, 130 insertions(+), 163 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c709d41c4b70..e667a870e0c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -213,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* SW counters */ for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); /* Q counters */ for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); /* VPORT counters */ for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_stats_desc[i].name); + vport_stats_desc[i].format); /* PPORT counters */ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_802_3_stats_desc[i].name); + pport_802_3_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2863_stats_desc[i].name); + pport_2863_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2819_stats_desc[i].name); + pport_2819_stats_desc[i].format); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, - pport_per_prio_traffic_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); } pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, pport_per_prio_pfc_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, prio); } } @@ -258,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, - rq_stats_desc[j].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_desc[j].name); + sq_stats_desc[j].format, + priv->channeltc_to_txq_map[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 793d7a1f92b5..cb6defd71fc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -105,11 +105,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - s->lro_packets += rq_stats->lro_packets; - s->lro_bytes += rq_stats->lro_bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; - s->rx_csum_sw += rq_stats->csum_sw; - s->rx_csum_inner += rq_stats->csum_inner; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_mpwqe_frag += rq_stats->mpwqe_frag; @@ -122,24 +122,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; - s->tso_packets += sq_stats->tso_packets; - s->tso_bytes += sq_stats->tso_bytes; - s->tso_inner_packets += sq_stats->tso_inner_packets; - s->tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; - s->tx_csum_inner += sq_stats->csum_offload_inner; - tx_offload_none += sq_stats->csum_offload_none; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + tx_offload_none += sq_stats->csum_none; } } /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; + s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; - s->link_down_events = MLX5_GET(ppcnt_reg, + s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bd947704b59c..022acc2e8922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -689,7 +689,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - rq->stats.csum_sw++; + rq->stats.csum_complete++; return; } @@ -699,7 +699,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; skb->encapsulation = 1; - rq->stats.csum_inner++; + rq->stats.csum_unnecessary_inner++; } return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 83bc32b25849..fcd490cc5610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -42,9 +42,11 @@ be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) struct counter_desc { - char name[ETH_GSTRING_LEN]; + char format[ETH_GSTRING_LEN]; int offset; /* Byte offset */ }; @@ -53,18 +55,18 @@ struct mlx5e_sw_stats { u64 rx_bytes; u64 tx_packets; u64 tx_bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_csum_unnecessary; u64 rx_csum_none; - u64 rx_csum_sw; - u64 rx_csum_inner; - u64 tx_csum_offload; - u64 tx_csum_inner; + u64 rx_csum_complete; + u64 rx_csum_unnecessary_inner; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; u64 tx_queue_stopped; u64 tx_queue_wake; u64 tx_queue_dropped; @@ -76,7 +78,7 @@ struct mlx5e_sw_stats { u64 rx_cqe_compress_pkts; /* Special handling counters */ - u64 link_down_events; + u64 link_down_events_phy; }; static const struct counter_desc sw_stats_desc[] = { @@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, @@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; struct mlx5e_qcounter_stats { @@ -125,12 +127,6 @@ struct mlx5e_vport_stats { }; static const struct counter_desc vport_stats_desc[] = { - { "rx_vport_error_packets", - VPORT_COUNTER_OFF(received_errors.packets) }, - { "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) }, - { "tx_vport_error_packets", - VPORT_COUNTER_OFF(transmit_errors.packets) }, - { "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) }, { "rx_vport_unicast_packets", VPORT_COUNTER_OFF(received_eth_unicast.packets) }, { "rx_vport_unicast_bytes", @@ -192,94 +188,68 @@ struct mlx5e_pport_stats { }; static const struct counter_desc pport_802_3_stats_desc[] = { - { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) }, - { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) }, - { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, - { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) }, - { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) }, - { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) }, - { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, - { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, - { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, - { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, - { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) }, - { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) }, - { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) }, - { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, - { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, - { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) }, - { "unsupported_op_rx", - PPORT_802_3_OFF(a_unsupported_opcodes_received) }, - { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, - { "pause_ctrl_tx", - PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, }; static const struct counter_desc pport_2863_stats_desc[] = { - { "in_octets", PPORT_2863_OFF(if_in_octets) }, - { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) }, - { "in_discards", PPORT_2863_OFF(if_in_discards) }, - { "in_errors", PPORT_2863_OFF(if_in_errors) }, - { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) }, - { "out_octets", PPORT_2863_OFF(if_out_octets) }, - { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) }, - { "out_discards", PPORT_2863_OFF(if_out_discards) }, - { "out_errors", PPORT_2863_OFF(if_out_errors) }, - { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) }, - { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) }, - { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) }, - { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) }, + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, }; static const struct counter_desc pport_2819_stats_desc[] = { - { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) }, - { "octets", PPORT_2819_OFF(ether_stats_octets) }, - { "pkts", PPORT_2819_OFF(ether_stats_pkts) }, - { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) }, - { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) }, - { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) }, - { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) }, - { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) }, - { "fragments", PPORT_2819_OFF(ether_stats_fragments) }, - { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) }, - { "collisions", PPORT_2819_OFF(ether_stats_collisions) }, - { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) }, - { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, - { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, - { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, - { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, - { "p1024to1518octets", - PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, - { "p1519to2047octets", - PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, - { "p2048to4095octets", - PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, - { "p4096to8191octets", - PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, - { "p8192to10239octets", - PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, }; static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { - { "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) }, - { "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) }, - { "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) }, - { "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) }, + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, }; static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - { "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, + { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; struct mlx5e_rq_stats { u64 packets; u64 bytes; - u64 csum_sw; - u64 csum_inner; + u64 csum_complete; + u64 csum_unnecessary_inner; u64 csum_none; u64 lro_packets; u64 lro_bytes; @@ -292,19 +262,19 @@ struct mlx5e_rq_stats { }; static const struct counter_desc rq_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, }; struct mlx5e_sq_stats { @@ -315,28 +285,28 @@ struct mlx5e_sq_stats { u64 tso_bytes; u64 tso_inner_packets; u64 tso_inner_bytes; - u64 csum_offload_inner; + u64 csum_partial_inner; u64 nop; /* less likely accessed in data path */ - u64 csum_offload_none; + u64 csum_none; u64 stopped; u64 wake; u64 dropped; }; static const struct counter_desc sq_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b000ddc29553..5a750b9cd006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -192,12 +192,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (skb->encapsulation) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM; - sq->stats.csum_offload_inner++; + sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; } } else - sq->stats.csum_offload_none++; + sq->stats.csum_none++; if (sq->cc != sq->prev_cc) { sq->prev_cc = sq->cc; From 3f4c68cfde30caa1f6d8368fd19590671411ade2 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 27 Jun 2016 15:30:02 +0530 Subject: [PATCH 0983/1050] net: thunderx: Fix link status reporting Check for SMU RX local/remote faults along with SPU LINK status. Otherwise at times link is UP at our end but DOWN at link partner's side. Also due to an issue in BGX it's rarely seen that initialization doesn't happen properly and SMU RX reports faults with everything fine at SPU. This patch tries to reinitialize LMAC to fix it. Also fixed LMAC disable sequence to properly bring down link. Signed-off-by: Sunil Goutham Signed-off-by: Tao Wang Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 91 ++++++++++++------- .../net/ethernet/cavium/thunder/thunder_bgx.h | 2 + 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 3ed21988626b..63a39ac97d53 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac) } /* Clear rcvflt bit (latching high) and read it back */ - bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); if (bgx->use_training) { @@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - /* Wait for MAC RX to be ready */ - if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL, - SMU_RX_CTL_STATUS, true)) { - dev_err(&bgx->pdev->dev, "SMU RX link not okay\n"); - return -1; - } - /* Wait for BGX RX to be idle */ if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) { dev_err(&bgx->pdev->dev, "SMU RX not idle\n"); @@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { - dev_err(&bgx->pdev->dev, "Receive fault\n"); - return -1; - } - - /* Receive link is latching low. Force it high and verify it */ - bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); - if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1, - SPU_STATUS1_RCV_LNK, false)) { - dev_err(&bgx->pdev->dev, "SPU receive link down\n"); - return -1; - } - + /* Clear receive packet disable */ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); cfg &= ~SPU_MISC_CTL_RX_DIS; bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); - return 0; + + /* Check for MAC RX faults */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL); + /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */ + cfg &= SMU_RX_CTL_STATUS; + if (!cfg) + return 0; + + /* Rx local/remote fault seen. + * Do lmac reinit to see if condition recovers + */ + bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type); + + return -1; } static void bgx_poll_for_link(struct work_struct *work) { struct lmac *lmac; - u64 link; + u64 spu_link, smu_link; lmac = container_of(work, struct lmac, dwork.work); @@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work) bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK, false); - link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); - if (link & SPU_STATUS1_RCV_LNK) { + spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); + smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL); + + if ((spu_link & SPU_STATUS1_RCV_LNK) && + !(smu_link & SMU_RX_CTL_STATUS)) { lmac->link_up = 1; if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) lmac->last_speed = 40000; @@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work) } if (lmac->last_link != lmac->link_up) { + if (lmac->link_up) { + if (bgx_xaui_check_link(lmac)) { + /* Errors, clear link_up state */ + lmac->link_up = 0; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; + } + } lmac->last_link = lmac->link_up; - if (lmac->link_up) - bgx_xaui_check_link(lmac); } queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); @@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) { struct lmac *lmac; - u64 cmrx_cfg; + u64 cfg; lmac = &bgx->lmac[lmacid]; if (lmac->check_link) { @@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) destroy_workqueue(lmac->check_link); } - cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - cmrx_cfg &= ~(1 << 15); - bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); + /* Disable packet reception */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_PKT_RX_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + /* Give chance for Rx/Tx FIFO to get drained */ + bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true); + bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true); + + /* Disable packet transmission */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_PKT_TX_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + /* Disable serdes lanes */ + if (!lmac->is_sgmii) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); + else + bgx_reg_modify(bgx, lmacid, + BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN); + + /* Disable LMAC */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + bgx_flush_dmac_addrs(bgx, lmacid); if ((bgx->lmac_type != BGX_MODE_XFI) && diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 149e179363a1..42010d2e5ddf 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -41,6 +41,7 @@ #define BGX_CMRX_RX_STAT10 0xC0 #define BGX_CMRX_RX_BP_DROP 0xC8 #define BGX_CMRX_RX_DMAC_CTL 0x0E8 +#define BGX_CMRX_RX_FIFO_LEN 0x108 #define BGX_CMR_RX_DMACX_CAM 0x200 #define RX_DMACX_CAM_EN BIT_ULL(48) #define RX_DMACX_CAM_LMACID(x) (x << 49) @@ -50,6 +51,7 @@ #define BGX_CMR_CHAN_MSK_AND 0x450 #define BGX_CMR_BIST_STATUS 0x460 #define BGX_CMR_RX_LMACS 0x468 +#define BGX_CMRX_TX_FIFO_LEN 0x518 #define BGX_CMRX_TX_STAT0 0x600 #define BGX_CMRX_TX_STAT1 0x608 #define BGX_CMRX_TX_STAT2 0x610 From 3e29adba567306504e99b8363edf2d47c19dbe1b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 27 Jun 2016 15:30:03 +0530 Subject: [PATCH 0984/1050] net: thunderx: Fix TL4 configuration for secondary Qsets TL4 calculation for a given SQ of secondary Qsets is incorrect and goes out of bounds and also for some SQ's TL4 chosen will transmit data via a different BGX interface and not same as primary Qset's interface. This patch fixes this issue. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 95f17f8cadac..16ed20357c5c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u32 rr_quantum; u8 sq_idx = sq->sq_num; u8 pqs_vnic; + int svf; if (sq->sqs_mode) pqs_vnic = nic->pqs_vf[vnic]; @@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* 24 bytes for FCS, IPG and preamble */ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); - tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + if (!sq->sqs_mode) { + tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + } else { + for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { + if (nic->vf_sqs[pqs_vnic][svf] == vnic) + break; + } + tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC); + tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF); + tl4 += (svf * NIC_TL4_PER_LMAC); + tl4 += (bgx * NIC_TL4_PER_BGX); + } tl4 += sq_idx; - if (sq->sqs_mode) - tl4 += vnic * 8; tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | From 42482b4546336ecd93acdd95e435bafd7a4c581c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 23 Jun 2016 14:50:35 -0700 Subject: [PATCH 0985/1050] drm/i915: Add more Kabylake PCI IDs. The spec has been updated adding new PCI IDs. Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-1-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit 33d9391d3020e069dca98fa87a604c037beb2b9e) Signed-off-by: Jani Nikula --- include/drm/i915_pciids.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 9094599a1150..87dde1c48fbf 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -309,6 +309,7 @@ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ @@ -322,7 +323,9 @@ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ #define INTEL_KBL_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \ INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */ From 6b9dc6a474fd4e025c615ed4582735360005727c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 23 Jun 2016 14:50:36 -0700 Subject: [PATCH 0986/1050] drm/i915: Removing PCI IDs that are no longer listed as Kabylake. This is unusual. Usually IDs listed on early stages of platform definition are kept there as reserved for later use. However these IDs here are not listed anymore in any of steppings and devices IDs tables for Kabylake on configurations overview section of BSpec. So it is better removing them before they become used in any other future platform. Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-2-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit a922eb8d4581c883c37ce6e12dca9ff2cb1ea723) Signed-off-by: Jani Nikula --- include/drm/i915_pciids.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 87dde1c48fbf..33466bfc6440 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -325,15 +325,10 @@ #define INTEL_KBL_GT3_IDS(info) \ INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \ - INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */ + INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ #define INTEL_KBL_GT4_IDS(info) \ - INTEL_VGA_DEVICE(0x5932, info), /* DT GT4 */ \ - INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \ - INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \ - INTEL_VGA_DEVICE(0x593D, info) /* WKS GT4 */ + INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ #define INTEL_KBL_IDS(info) \ INTEL_KBL_GT1_IDS(info), \ From 96183182ad05d1ce31b9048921c12bf4ad621eaf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 27 Jun 2016 20:48:53 +0800 Subject: [PATCH 0987/1050] ibmvnic: fix to use list_for_each_safe() when delete items Since we will remove items off the list using list_del() we need to use a safe version of the list_for_each() macro aptly named list_for_each_safe(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 864cb21351a4..ecdb6854a898 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2121,7 +2121,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff; + struct ibmvnic_error_buff *error_buff, *tmp; unsigned long flags; bool found = false; int i; @@ -2133,7 +2133,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, } spin_lock_irqsave(&adapter->error_list_lock, flags); - list_for_each_entry(error_buff, &adapter->errors, list) + list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list) if (error_buff->error_id == crq->request_error_rsp.error_id) { found = true; list_del(&error_buff->list); @@ -3141,14 +3141,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq, static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) { - struct ibmvnic_inflight_cmd *inflight_cmd; + struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1; struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff; + struct ibmvnic_error_buff *error_buff, *tmp2; unsigned long flags; unsigned long flags2; spin_lock_irqsave(&adapter->inflight_lock, flags); - list_for_each_entry(inflight_cmd, &adapter->inflight, list) { + list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) { switch (inflight_cmd->crq.generic.cmd) { case LOGIN: dma_unmap_single(dev, adapter->login_buf_token, @@ -3165,8 +3165,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) break; case REQUEST_ERROR_INFO: spin_lock_irqsave(&adapter->error_list_lock, flags2); - list_for_each_entry(error_buff, &adapter->errors, - list) { + list_for_each_entry_safe(error_buff, tmp2, + &adapter->errors, list) { dma_unmap_single(dev, error_buff->dma, error_buff->len, DMA_FROM_DEVICE); From a3d2e9f8eb1487f4191ff08ce2d3d63702c65a90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2016 17:38:50 +0200 Subject: [PATCH 0988/1050] tcp: do not send too big packets at retransmit time Arjun reported a bug in TCP stack and bisected it to a recent commit. In case where we process SACK, we can coalesce multiple skbs into fat ones (tcp_shift_skb_data()), to lower write queue overhead, because we do not expect to retransmit these packets. However, SACK reneging can happen, forcing the sender to retransmit all these packets. If skb->len is above 64KB, we then send buggy IP packets that could hang TSO engine on cxgb4. Neal suggested to use tcp_tso_autosize() instead of tp->gso_segs so that we cook packets of optimal size vs TCP/pacing. Thanks to Arjun for reporting the bug and running the tests ! Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Eric Dumazet Reported-by: Arjun V Tested-by: Arjun V Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8bd9911fdd16..e00e972c4e6a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2751,7 +2751,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; struct sk_buff *hole = NULL; - u32 last_lost; + u32 max_segs, last_lost; int mib_idx; int fwd_rexmitting = 0; @@ -2771,6 +2771,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } + max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; int segs; @@ -2784,6 +2785,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + /* In case tcp_shift_skb_data() have aggregated large skbs, + * we need to make sure not sending too bigs TSO packets + */ + segs = min_t(int, segs, max_segs); if (fwd_rexmitting) { begin_fwd: From 565ce8f32ac4a233b474f401e1d3e7e1de0a31fd Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Jun 2016 18:34:42 +0200 Subject: [PATCH 0989/1050] net: bridge: fix vlan stats continue counter I made a dumb off-by-one mistake when I added the vlan stats counter dumping code. The increment should happen before the check, not after otherwise we miss one entry when we continue dumping. Fixes: a60c090361ea ("bridge: netlink: export per-vlan stats") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a5343c7232bf..85e89f693589 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1273,7 +1273,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; - if (vl_idx++ < *prividx) + if (++vl_idx < *prividx) continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; From ceb56070359b7329b5678b5d95a376fcb24767be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 27 Jun 2016 21:38:11 +0200 Subject: [PATCH 0990/1050] bpf, perf: delay release of BPF prog after grace period Commit dead9f29ddcc ("perf: Fix race in BPF program unregister") moved destruction of BPF program from free_event_rcu() callback to __free_event(), which is problematic if used with tail calls: if prog A is attached as trace event directly, but at the same time present in a tail call map used by another trace event program elsewhere, then we need to delay destruction via RCU grace period since it can still be in use by the program doing the tail call (the prog first needs to be dropped from the tail call map, then trace event with prog A attached destroyed, so we get immediate destruction). Fixes: dead9f29ddcc ("perf: Fix race in BPF program unregister") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Jann Horn Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ kernel/events/core.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8269cafc6eb1..0de4de6dd43e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -264,6 +264,10 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } + +static inline void bpf_prog_put_rcu(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..d00c47b67a97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7531,7 +7531,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog) { event->tp_event->prog = NULL; - bpf_prog_put(prog); + bpf_prog_put_rcu(prog); } } From 5b4d10f5e0369ed79434593b7cd8e85eebbe473f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Jun 2016 23:50:29 +0300 Subject: [PATCH 0991/1050] qlcnic: use the correct ring in qlcnic_83xx_process_rcv_ring_diag() There is a static checker warning here "warn: mask and shift to zero" and the code sets "ring" to zero every time. From looking at how QLCNIC_FETCH_RING_ID() is used in qlcnic_83xx_process_rcv_ring() the qlcnic_83xx_hndl() should be removed. Fixes: 4be41e92f7c6 ('qlcnic: 83xx data path routines') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 7bd6f25b4625..607bb7d4514d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -2220,7 +2220,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring) if (!opcode) return; - ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0])); + ring = QLCNIC_FETCH_RING_ID(sts_data[0]); qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data); desc = &sds_ring->desc_head[consumer]; desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM); From c041778c966c92c964033f1cdfee60a9f2b5e465 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 29 Jun 2016 10:36:39 +0200 Subject: [PATCH 0992/1050] cfg80211: fix proto in ieee80211_data_to_8023 for frames without LLC header The PDU length of incoming LLC frames is set to the total skb payload size in __ieee80211_data_to_8023() of net/wireless/util.c which incorrectly includes the length of the IEEE 802.11 header. The resulting LLC frame header has a too large PDU length, causing the llc_fixup_skb() function of net/llc/llc_input.c to reject the incoming skb, effectively breaking STP. Solve the problem by properly substracting the IEEE 802.11 frame header size from the PDU length, allowing the LLC processor to pick up the incoming control messages. Special thanks to Gerry Rozema for tracking down the regression and proposing a suitable patch. Fixes: 2d1c304cb2d5 ("cfg80211: add function for 802.3 conversion with separate output buffer") Cc: stable@vger.kernel.org Reported-by: Gerry Rozema Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 4e809e978b7d..2443ee30ba5b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, * replace EtherType */ hdrlen += ETH_ALEN + 2; else - tmp.h_proto = htons(skb->len); + tmp.h_proto = htons(skb->len - hdrlen); pskb_pull(skb, hdrlen); From 889ad4566604610804df984e1a3dd5e2c66256e5 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 28 Jun 2016 20:41:31 -0700 Subject: [PATCH 0993/1050] e1000e: keep VLAN interfaces functional after rxvlan off I've got a bug report about an e1000e interface, where a VLAN interface is set up on top of it: $ ip link add link ens1f0 name ens1f0.99 type vlan id 99 $ ip link set ens1f0 up $ ip link set ens1f0.99 up $ ip addr add 192.168.99.92 dev ens1f0.99 At this point, I can ping another host on vlan 99, ip 192.168.99.91. However, if I do the following: $ ethtool -K ens1f0 rxvlan off Then no traffic passes on ens1f0.99. It comes back if I toggle rxvlan on again. I'm not sure if this is actually intended behavior, or if there's a lack of software VLAN stripping fallback, or what, but things continue to work if I simply don't call e1000e_vlan_strip_disable() if there are active VLANs (plagiarizing a function from the e1000 driver here) on the interface. Also slipped a related-ish fix to the kerneldoc text for e1000e_vlan_strip_disable here... Signed-off-by: Jarod Wilson Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 75e60897b7e7..73f745205a1c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -154,6 +154,16 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) writel(val, hw->hw_addr + reg); } +static bool e1000e_vlan_used(struct e1000_adapter *adapter) +{ + u16 vid; + + for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + return true; + + return false; +} + /** * e1000_regdump - register printout routine * @hw: pointer to the HW structure @@ -2789,7 +2799,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter) } /** - * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping + * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping * @adapter: board private structure to initialize **/ static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter) @@ -3443,7 +3453,8 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX || + e1000e_vlan_used(adapter)) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); From b560f03ddfb072bca65e9440ff0dc4f9b1d1f056 Mon Sep 17 00:00:00 2001 From: David Barroso Date: Tue, 28 Jun 2016 11:16:43 +0300 Subject: [PATCH 0994/1050] neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() neigh_xmit() expects to be called inside an RCU-bh read side critical section, and while one of its two current callers gets this right, the other one doesn't. More specifically, neigh_xmit() has two callers, mpls_forward() and mpls_output(), and while both callers call neigh_xmit() under rcu_read_lock(), this provides sufficient protection for neigh_xmit() only in the case of mpls_forward(), as that is always called from softirq context and therefore doesn't need explicit BH protection, while mpls_output() can be called from process context with softirqs enabled. When mpls_output() is called from process context, with softirqs enabled, we can be preempted by a softirq at any time, and RCU-bh considers the completion of a softirq as signaling the end of any pending read-side critical sections, so if we do get a softirq while we are in the part of neigh_xmit() that expects to be run inside an RCU-bh read side critical section, we can end up with an unexpected RCU grace period running right in the middle of that critical section, making things go boom. This patch fixes this impedance mismatch in the callee, by making neigh_xmit() always take rcu_read_{,un}lock_bh() around the code that expects to be treated as an RCU-bh read side critical section, as this seems a safer option than fixing it in the callers. Fixes: 4fd3d7d9e868f ("neigh: Add helper function neigh_xmit") Signed-off-by: David Barroso Signed-off-by: Lennert Buytenhek Acked-by: David Ahern Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 29dd8cc22bbf..510cd62fcb99 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2469,13 +2469,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), From 34c7bb4705a0a2d344b0c82eaf3d3bfa2bc9da45 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 28 Jun 2016 07:46:03 -0400 Subject: [PATCH 0995/1050] qed: Protect the doorbell BAR with the write barriers. SPQ doorbell is currently protected with the compilation barrier. Under the stress scenarios, we may get into a state where (due to the weak ordering) several ramrod doorbells were written to the BAR with an out-of-order producer values. Need to change the barrier type to a write barrier to make sure that the write buffer is flushed after each doorbell. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 67d9893774d8..b122f6013b6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn, SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL, DQ_XCM_CORE_SPQ_PROD_CMD); db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; - - /* validate producer is up to-date */ - rmb(); - db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain)); - /* do not reorder */ - barrier(); + /* make sure the SPQE is updated before the doorbell */ + wmb(); DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db); /* make sure doorbell is rang */ - mmiowb(); + wmb(); DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n", From d913d3a763a6f66a862a6eafcf6da89a7905832a Mon Sep 17 00:00:00 2001 From: Samuel Gauthier Date: Tue, 28 Jun 2016 17:22:26 +0200 Subject: [PATCH 0996/1050] openvswitch: fix conntrack netlink event delivery Only the first and last netlink message for a particular conntrack are actually sent. The first message is sent through nf_conntrack_confirm when the conntrack is committed. The last one is sent when the conntrack is destroyed on timeout. The other conntrack state change messages are not advertised. When the conntrack subsystem is used from netfilter, nf_conntrack_confirm is called for each packet, from the postrouting hook, which in turn calls nf_ct_deliver_cached_events to send the state change netlink messages. This commit fixes the problem by calling nf_ct_deliver_cached_events in the non-commit case as well. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") CC: Joe Stringer CC: Justin Pettit CC: Andy Zhou CC: Thomas Graf Signed-off-by: Samuel Gauthier Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3d5feede962d..d84312584ee4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, */ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; __ovs_ct_update_key(key, state, &info->zone, exp->master); - } else - return __ovs_ct_lookup(net, key, info, skb); + } else { + struct nf_conn *ct; + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ct = (struct nf_conn *)skb->nfct; + if (ct) + nf_ct_deliver_cached_events(ct); + } return 0; } From 03bea60409328de54e4ff7ec41672e12a9cb0908 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jun 2016 16:03:55 +0200 Subject: [PATCH 0997/1050] ovl: get_write_access() in truncate When truncating a file we should check write access on the underlying inode. And we should do so on the lower file as well (before copy-up) for consistency. Original patch and test case by Aihua Zhang. - - >o >o - - test.c - - >o >o - - #include #include #include int main(int argc, char *argv[]) { int ret; ret = truncate(argv[0], 4096); if (ret != -1) { fprintf(stderr, "truncate(argv[0]) should have failed\n"); return 1; } if (errno != ETXTBSY) { perror("truncate(argv[0])"); return 1; } return 0; } - - >o >o - - >o >o - - >o >o - - Reported-by: Aihua Zhang Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 8514d692042b..c831c2e5f803 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -59,16 +59,37 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; + if (attr->ia_valid & ATTR_SIZE) { + struct inode *realinode = d_inode(ovl_dentry_real(dentry)); + + err = -ETXTBSY; + if (atomic_read(&realinode->i_writecount) < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (!err) { + struct inode *winode = NULL; + upperdentry = ovl_dentry_upper(dentry); + if (attr->ia_valid & ATTR_SIZE) { + winode = d_inode(upperdentry); + err = get_write_access(winode); + if (err) + goto out_drop_write; + } + inode_lock(upperdentry->d_inode); err = notify_change(upperdentry, attr, NULL); if (!err) ovl_copyattr(upperdentry->d_inode, dentry->d_inode); inode_unlock(upperdentry->d_inode); + + if (winode) + put_write_access(winode); } +out_drop_write: ovl_drop_write(dentry); out: return err; From 3a8bd717ee3e0508fc0dd517b97e950989e15f8c Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 27 Jun 2016 14:46:47 +0800 Subject: [PATCH 0998/1050] drm/amd/powerplay: Workaround for Memory EDC Error on Polaris10. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index d23dcce6c03c..d15584c84595 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2924,6 +2924,31 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) return 0; } +int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr) +{ + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table = + table_info->vdd_dep_on_mclk; + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + uint32_t i; + + if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) { + if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000) + return 0; + + for (i = 0; i < lookup_table->count; i++) { + if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) { + dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i; + return 0; + } + } + } + return 0; +} + + int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) { struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); @@ -3001,6 +3026,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) polaris10_set_features_platform_caps(hwmgr); + polaris10_patch_voltage_workaround(hwmgr); polaris10_init_dpm_defaults(hwmgr); /* Get leakage voltage based on leakage ID. */ From 0636e0d666e0238fa22348172c20a49f42a94395 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 27 Jun 2016 17:30:24 +0800 Subject: [PATCH 0999/1050] drm/amd/powerplay: fix issue uvd dpm can't enabled on Polaris11. 1. Populate correct value of VDDCI voltage for SMC SAMU, VCE, and UVD levels depending on whether VDDCi control is SVI2 or GPIO. 2. Populate SMC ACPI minimum voltage using VBIOS boot SCLK and MCLK When static voltage is configured as VDDCI, driver still tries to program a voltage for MM minVoltage using VDDC-VDDCI delta requirement. minVoltage should be set as boot up voltage. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 101 +++++++++++------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index d15584c84595..ec2a7ada346a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -1423,22 +1423,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - if (!data->sclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, - * already converted to SMC_UL */ - sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, - table->ACPILevel.SclkFrequency, - &table->ACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDC voltage value " - "in Clock Dependency Table", ); - } else { - sclk_frequency = data->vbios_boot_state.sclk_bootup_value; - table->ACPILevel.MinVoltage = - data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE; - } + + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + sclk_frequency, + &table->ACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDC voltage value " + "in Clock Dependency Table", + ); + result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency, &(table->ACPILevel.SclkSetting)); PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result); @@ -1463,24 +1460,18 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); - if (!data->mclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ - table->MemoryACPILevel.MclkFrequency = - data->dpm_table.mclk_table.dpm_levels[0].value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, - table->MemoryACPILevel.MclkFrequency, - &table->MemoryACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDCI voltage value " - "in Clock Dependency Table", - ); - } else { - table->MemoryACPILevel.MclkFrequency = - data->vbios_boot_state.mclk_bootup_value; - table->MemoryACPILevel.MinVoltage = - data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE; - } + + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = + data->dpm_table.mclk_table.dpm_levels[0].value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + &table->MemoryACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value " + "in Clock Dependency Table", + ); us_mvdd = 0; if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) || @@ -1525,6 +1516,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->VceLevelCount = (uint8_t)(mm_table->count); table->VceBootLevel = 0; @@ -1534,9 +1526,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, table->VceLevel[count].MinVoltage = 0; table->VceLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->VceLevel[count].MinVoltage |= - ((mm_table->entries[count].vddc - data->vddc_vddci_delta) * - VOLTAGE_SCALE) << VDDCI_SHIFT; + (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /*retrieve divider value for VBIOS */ @@ -1565,6 +1566,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->SamuBootLevel = 0; table->SamuLevelCount = (uint8_t)(mm_table->count); @@ -1575,8 +1577,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /* retrieve divider value for VBIOS */ @@ -1659,6 +1669,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->UvdLevelCount = (uint8_t)(mm_table->count); table->UvdBootLevel = 0; @@ -1669,8 +1680,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /* retrieve divider value for VBIOS */ @@ -1691,8 +1710,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); - } + return result; } From d9d533c1483c4daf76e7e720c35896a430563ff8 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Tue, 28 Jun 2016 13:28:50 +0800 Subject: [PATCH 1000/1050] drm/amdgpu: add ACLK_CNTL setting for polaris10 This is a temporary workaround for early boards. Signed-off-by: Ken Wang Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1a5cbaff1e34..b2ebd4fef6cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -47,6 +47,8 @@ #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" +#include "smu/smu_7_1_3_d.h" + #define GFX8_NUM_GFX_RINGS 1 #define GFX8_NUM_COMPUTE_RINGS 8 @@ -693,6 +695,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, polaris10_golden_common_all, (const u32)ARRAY_SIZE(polaris10_golden_common_all)); + WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, From a7f14a184e0e8e94becfc3f9608f6b0f9c339572 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 28 Jun 2016 16:22:07 +0800 Subject: [PATCH 1001/1050] drm/amd/powerplay: workaround for UVD clock issue workaround issue that when uvd dpm disabled, uvd clock remain high on polaris10. Manually turn off the clocks. Signed-off-by: Rex Zhu Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e19520c4b4b6..d9c88d13f8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1106,6 +1106,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) if (fences == 0 && handles == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); + /* just work around for uvd clock remain high even + * when uvd dpm disabled on Polaris10 */ + if (adev->asic_type == CHIP_POLARIS10) + amdgpu_asic_set_uvd_clocks(adev, 0, 0); } else { amdgpu_asic_set_uvd_clocks(adev, 0, 0); } From 91ff811f32763ea3135e832f7c1aeafc85ae1c98 Mon Sep 17 00:00:00 2001 From: Venkat Reddy Talla Date: Wed, 29 Jun 2016 15:31:27 +0530 Subject: [PATCH 1002/1050] regulator: max77620: check for valid regulator info SD4 regulator is not registered with regulator core framework in probe as there is no support in MAX77620 PMIC, removing SD4 entry from MAX77620 regulator information list and checking for valid regulator information data before configuring FPS source and FPS power up/down period to avoid NULL pointer exception if regulator not registered with core. Signed-off-by: Venkat Reddy Talla Signed-off-by: Mark Brown --- drivers/regulator/max77620-regulator.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c index 321e804aeab0..a1b49a6d538f 100644 --- a/drivers/regulator/max77620-regulator.c +++ b/drivers/regulator/max77620-regulator.c @@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic, unsigned int val; int ret; + if (!rinfo) + return 0; + switch (fps_src) { case MAX77620_FPS_SRC_0: case MAX77620_FPS_SRC_1: @@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic, int pd = rpdata->active_fps_pd_slot; int ret = 0; + if (!rinfo) + return 0; + if (is_suspend) { pu = rpdata->suspend_fps_pu_slot; pd = rpdata->suspend_fps_pd_slot; @@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = { RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1), RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE), RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE), - RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE), RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000), RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000), From 1bcbf42d2732b3fdaa8559b0dfc91567769e23c8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Jun 2016 11:19:32 -0700 Subject: [PATCH 1003/1050] nfit: fix format interface code byte order Per JEDEC Annex L Release 3 the SPD data is: Bits 9~5 00 000 = Function Undefined 00 001 = Byte addressable energy backed 00 010 = Block addressed 00 011 = Byte addressable, no energy backed All other codes reserved Bits 4~0 0 0000 = Proprietary interface 0 0001 = Standard interface 1 All other codes reserved; see Definitions of Functions ...and per the ACPI 6.1 spec: byte0: Bits 4~0 (0 or 1) byte1: Bits 9~5 (1, 2, or 3) ...so a format interface code displayed as 0x301 should be stored in the nfit as (0x1, 0x3), little-endian. Cc: Toshi Kani Cc: Rafael J. Wysocki Cc: Robert Moore Cc: Robert Elliott Link: https://bugzilla.kernel.org/show_bug.cgi?id=121161 Fixes: 30ec5fd464d5 ("nfit: fix format interface code byte order per ACPI6.1") Fixes: 5ad9a7fde07a ("acpi/nfit: Update nfit driver to comply with ACPI 6.1") Reported-by: Kristin Jacque Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 6 +++--- drivers/acpi/nfit.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 32579a7b71d5..ac6ddcc080d4 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev, { struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); - return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code)); + return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code)); } static DEVICE_ATTR_RO(format); @@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev, continue; if (nfit_dcr->dcr->code == dcr->code) continue; - rc = sprintf(buf, "%#x\n", - be16_to_cpu(nfit_dcr->dcr->code)); + rc = sprintf(buf, "0x%04x\n", + le16_to_cpu(nfit_dcr->dcr->code)); break; } if (rc != ENXIO) diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 11cb38348aef..02b9ea1e8d2e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -53,12 +53,12 @@ enum nfit_uuids { }; /* - * Region format interface codes are stored as an array of bytes in the - * NFIT DIMM Control Region structure + * Region format interface codes are stored with the interface as the + * LSB and the function as the MSB. */ -#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */ -#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */ -#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */ +#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */ +#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */ +#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */ enum { NFIT_BLK_READ_FLUSH = 1, From 3c35f6edc09b239a60de87a5aeb78563fc372704 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:49 +0100 Subject: [PATCH 1004/1050] reset: Reorder inline reset_control_get*() wrappers We're about to split the current API into two, where consumers will be forced to be explicit when requesting reset lines. The choice will be to either the call the *_exclusive or *_shared variant depending on whether they can actually tolorate not being asserted when that request is made. The new API will look like this once reorded and complete: reset_control_get_exclusive() reset_control_get_shared() reset_control_get_optional_exclusive() reset_control_get_optional_shared() of_reset_control_get_exclusive() of_reset_control_get_shared() of_reset_control_get_exclusive_by_index() of_reset_control_get_shared_by_index() devm_reset_control_get_exclusive() devm_reset_control_get_shared() devm_reset_control_get_optional_exclusive() devm_reset_control_get_optional_shared() devm_reset_control_get_exclusive_by_index() devm_reset_control_get_shared_by_index() Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index ec0306ce7b92..33eaf11dabe2 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } -static inline struct reset_control *reset_control_get_optional( - struct device *dev, const char *id) -{ - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); -} - /** * reset_control_get_shared - Lookup and obtain a shared reference to a * reset controller. @@ -141,6 +135,12 @@ static inline struct reset_control *reset_control_get_shared( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); } +static inline struct reset_control *reset_control_get_optional( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); +} + /** * of_reset_control_get - Lookup and obtain an exclusive reference to a * reset controller. @@ -191,6 +191,21 @@ static inline struct reset_control *__must_check devm_reset_control_get( return __devm_reset_control_get(dev, id, 0, 0); } +/** + * devm_reset_control_get_shared - resource managed reset_control_get_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + static inline struct reset_control *devm_reset_control_get_optional( struct device *dev, const char *id) { @@ -212,21 +227,6 @@ static inline struct reset_control *devm_reset_control_get_by_index( return __devm_reset_control_get(dev, NULL, index, 0); } -/** - * devm_reset_control_get_shared - resource managed reset_control_get_shared() - * @dev: device to be reset by the controller - * @id: reset line name - * - * Managed reset_control_get_shared(). For reset controllers returned from - * this function, reset_control_put() is called automatically on driver detach. - * See reset_control_get_shared() for more information. - */ -static inline struct reset_control *devm_reset_control_get_shared( - struct device *dev, const char *id) -{ - return __devm_reset_control_get(dev, id, 0, 1); -} - /** * devm_reset_control_get_shared_by_index - resource managed * reset_control_get_shared From a53e35db70d1638002e40d495d096181dbb0fe16 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:50 +0100 Subject: [PATCH 1005/1050] reset: Ensure drivers are explicit when requesting reset lines Phasing out generic reset line requests enables us to make some better decisions on when and how to (de)assert said lines. If an 'exclusive' line is requested, we know a device *requires* a reset and that it's preferable to act upon a request right away. However, if a 'shared' reset line is requested, we can reasonably assume sure that placing a device into reset isn't a hard requirement, but probably a measure to save power and is thus able to cope with not being asserted if another device is still in use. In order allow gentle adoption and not to forcing all consumers to move to the API immediately, causing administration headache between subsystems, this patch adds some temporary stand-in shim-calls. This will ease the burden at merge time and allow subsystems to migrate over to the new API in a more realistic time-frame. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 106 ++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 24 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index 33eaf11dabe2..9cf4cf39bf1d 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get( #endif /* CONFIG_RESET_CONTROLLER */ /** - * reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @dev: device to be reset by the controller * @id: reset line name * @@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get( * * Use of id names is optional. */ -static inline struct reset_control *__must_check reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control * +__must_check reset_control_get_exclusive(struct device *dev, const char *id) { #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); @@ -135,15 +135,15 @@ static inline struct reset_control *reset_control_get_shared( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); } -static inline struct reset_control *reset_control_get_optional( +static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } /** - * of_reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @node: device to be reset by the controller * @id: reset line name * @@ -151,15 +151,16 @@ static inline struct reset_control *reset_control_get_optional( * * Use of id names is optional. */ -static inline struct reset_control *of_reset_control_get( +static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { return __of_reset_control_get(node, id, 0, 0); } /** - * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to - * a reset controller by index. + * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive + * reference to a reset controller + * by index. * @node: device to be reset by the controller * @index: index of the reset controller * @@ -167,23 +168,27 @@ static inline struct reset_control *of_reset_control_get( * in whatever order. Returns a struct reset_control or IS_ERR() condition * containing errno. */ -static inline struct reset_control *of_reset_control_get_by_index( +static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { return __of_reset_control_get(node, NULL, index, 0); } /** - * devm_reset_control_get - resource managed reset_control_get() + * devm_reset_control_get_exclusive - resource managed + * reset_control_get_exclusive() * @dev: device to be reset by the controller * @id: reset line name * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * Managed reset_control_get_exclusive(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. */ -static inline struct reset_control *__must_check devm_reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control * +__must_check devm_reset_control_get_exclusive(struct device *dev, + const char *id) { #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); @@ -206,23 +211,26 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, 1); } -static inline struct reset_control *devm_reset_control_get_optional( +static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __devm_reset_control_get(dev, id, 0, 0); } /** - * devm_reset_control_get_by_index - resource managed reset_control_get + * devm_reset_control_get_exclusive_by_index - resource managed + * reset_control_get_exclusive() * @dev: device to be reset by the controller * @index: index of the reset controller * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * Managed reset_control_get_exclusive(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. */ -static inline struct reset_control *devm_reset_control_get_by_index( - struct device *dev, int index) +static inline struct reset_control * +devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { return __devm_reset_control_get(dev, NULL, index, 0); } @@ -243,4 +251,54 @@ static inline struct reset_control *devm_reset_control_get_shared_by_index( return __devm_reset_control_get(dev, NULL, index, 1); } +/* + * TEMPORARY calls to use during transition: + * + * of_reset_control_get() => of_reset_control_get_exclusive() + * + * These inline function calls will be removed once all consumers + * have been moved over to the new explicit API. + */ +static inline struct reset_control *reset_control_get( + struct device *dev, const char *id) +{ + return reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *reset_control_get_optional( + struct device *dev, const char *id) +{ + return reset_control_get_optional_exclusive(dev, id); +} + +static inline struct reset_control *of_reset_control_get( + struct device_node *node, const char *id) +{ + return of_reset_control_get_exclusive(node, id); +} + +static inline struct reset_control *of_reset_control_get_by_index( + struct device_node *node, int index) +{ + return of_reset_control_get_exclusive_by_index(node, index); +} + +static inline struct reset_control *devm_reset_control_get( + struct device *dev, const char *id) +{ + return devm_reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *devm_reset_control_get_optional( + struct device *dev, const char *id) +{ + return devm_reset_control_get_optional_exclusive(dev, id); + +} + +static inline struct reset_control *devm_reset_control_get_by_index( + struct device *dev, int index) +{ + return devm_reset_control_get_exclusive_by_index(dev, index); +} #endif From 40faee8ee471ae526344cd5c62ff520d356de841 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:51 +0100 Subject: [PATCH 1006/1050] reset: Supply *_shared variant calls when using of_* API Consumers need to be able to specify whether they are requesting an 'exclusive' or 'shared' reset line no matter which API (of_*, devm_*, etc) they are using. This change allows users of the of_* API in particular to specify that their request is for a 'shared' line. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/include/linux/reset.h b/include/linux/reset.h index 9cf4cf39bf1d..fd69240907c8 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -157,6 +157,31 @@ static inline struct reset_control *of_reset_control_get_exclusive( return __of_reset_control_get(node, id, 0, 0); } +/** + * of_reset_control_get_shared - Lookup and obtain an shared reference + * to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *of_reset_control_get_shared( + struct device_node *node, const char *id) +{ + return __of_reset_control_get(node, id, 0, 1); +} + /** * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive * reference to a reset controller @@ -174,6 +199,34 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( return __of_reset_control_get(node, NULL, index, 0); } +/** + * of_reset_control_get_shared_by_index - Lookup and obtain an shared + * reference to a reset controller + * by index. + * @node: device to be reset by the controller + * @index: index of the reset controller + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * This is to be used to perform a list of resets for a device or power domain + * in whatever order. Returns a struct reset_control or IS_ERR() condition + * containing errno. + */ +static inline struct reset_control *of_reset_control_get_shared_by_index( + struct device_node *node, int index) +{ + return __of_reset_control_get(node, NULL, index, 1); +} + /** * devm_reset_control_get_exclusive - resource managed * reset_control_get_exclusive() From c33d61a0c400c50f378ef03a386e646abca292ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:52 +0100 Subject: [PATCH 1007/1050] reset: Supply *_shared variant calls when using *_optional APIs Consumers need to be able to specify whether they are requesting an 'exclusive' or 'shared' reset line no matter which API (of_*, devm_*, etc) they are using. This change allows users of the optional_* API in particular to specify that their request is for a 'shared' line. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/reset.h b/include/linux/reset.h index fd69240907c8..c358106611af 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -141,6 +141,12 @@ static inline struct reset_control *reset_control_get_optional_exclusive( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } +static inline struct reset_control *reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); +} + /** * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference * to a reset controller. @@ -270,6 +276,12 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive( return __devm_reset_control_get(dev, id, 0, 0); } +static inline struct reset_control *devm_reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + /** * devm_reset_control_get_exclusive_by_index - resource managed * reset_control_get_exclusive() From 0bcc0eab363aa80f79769324bf3f2ab7781840f2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:53 +0100 Subject: [PATCH 1008/1050] reset: TRIVIAL: Add line break at same place for similar APIs Standardise the way inline functions: devm_reset_control_get_shared_by_index devm_reset_control_get_exclusive_by_index ... are formatted. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index c358106611af..45a4abeb6acb 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -310,8 +310,8 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) * this function, reset_control_put() is called automatically on driver detach. * See reset_control_get_shared() for more information. */ -static inline struct reset_control *devm_reset_control_get_shared_by_index( - struct device *dev, int index) +static inline struct reset_control * +devm_reset_control_get_shared_by_index(struct device *dev, int index) { return __devm_reset_control_get(dev, NULL, index, 1); } From f5ee3f2b5ad6881a8194b9f684ce11640d1062f7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 18:08:54 +0100 Subject: [PATCH 1009/1050] usb: host: ohci-st: Inform the reset framework that our reset line may be shared On the STiH410 B2120 development board the ST EHCI IP shares its reset line with the OHCI IP. New functionality in the reset subsystems forces consumers to be explicit when requesting shared/exclusive reset lines. Acked-by: Alan Stern Signed-off-by: Lee Jones --- drivers/usb/host/ohci-st.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c index acf2eb2a5676..02816a1515a1 100644 --- a/drivers/usb/host/ohci-st.c +++ b/drivers/usb/host/ohci-st.c @@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev) priv->clk48 = NULL; } - priv->pwr = devm_reset_control_get_optional(&dev->dev, "power"); + priv->pwr = + devm_reset_control_get_optional_shared(&dev->dev, "power"); if (IS_ERR(priv->pwr)) { err = PTR_ERR(priv->pwr); goto err_put_clks; } - priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset"); + priv->rst = + devm_reset_control_get_optional_shared(&dev->dev, "softreset"); if (IS_ERR(priv->rst)) { err = PTR_ERR(priv->rst); goto err_put_clks; From 19599304625b74c95bff318c735928eec668b1ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 18:08:53 +0100 Subject: [PATCH 1010/1050] usb: host: ehci-st: Inform the reset framework that our reset line may be shared On the STiH410 B2120 development board the ST EHCI IP shares its reset line with the OHCI IP. New functionality in the reset subsystems forces consumers to be explicit when requesting shared/exclusive reset lines. Acked-by: Peter Griffin Acked-by: Alan Stern Signed-off-by: Lee Jones --- drivers/usb/host/ehci-st.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c index a94ed677d937..be4a2788fc58 100644 --- a/drivers/usb/host/ehci-st.c +++ b/drivers/usb/host/ehci-st.c @@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev) priv->clk48 = NULL; } - priv->pwr = devm_reset_control_get_optional(&dev->dev, "power"); + priv->pwr = + devm_reset_control_get_optional_shared(&dev->dev, "power"); if (IS_ERR(priv->pwr)) { err = PTR_ERR(priv->pwr); if (err == -EPROBE_DEFER) @@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev) priv->pwr = NULL; } - priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset"); + priv->rst = + devm_reset_control_get_optional_shared(&dev->dev, "softreset"); if (IS_ERR(priv->rst)) { err = PTR_ERR(priv->rst); if (err == -EPROBE_DEFER) From 002f17bc54ff4005260d8e6e6700de97f5b25679 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 28 Jun 2016 09:23:58 +0100 Subject: [PATCH 1011/1050] usb: dwc3: st: Inform the reset framework that our reset line may be shared On the STiH410 B2120 development board the MiPHY28lp shares its reset line with the Synopsys DWC3 SuperSpeed (SS) USB 3.0 Dual-Role-Device (DRD). New functionality in the reset subsystems forces consumers to be explicit when requesting shared/exclusive reset lines. Acked-by: Felipe Balbi Signed-off-by: Lee Jones --- drivers/usb/dwc3/dwc3-st.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 5c0adb9c6fb2..b204617ea4e6 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -237,7 +237,8 @@ static int st_dwc3_probe(struct platform_device *pdev) /* Manage PowerDown */ reset_control_deassert(dwc3_data->rstc_pwrdn); - dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset"); + dwc3_data->rstc_rst = + devm_reset_control_get_shared(dev, "softreset"); if (IS_ERR(dwc3_data->rstc_rst)) { dev_err(&pdev->dev, "could not get reset controller\n"); ret = PTR_ERR(dwc3_data->rstc_rst); From 9278e707f4e187df2b4d9eeb2bc78a1724fbe4ac Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 28 Jun 2016 09:32:12 +0100 Subject: [PATCH 1012/1050] phy: phy-stih407-usb: Inform the reset framework that our reset line may be shared On the STiH410 B2120 development board the ports on the Generic PHY share their reset lines with each other. New functionality in the reset subsystems forces consumers to be explicit when requesting shared/exclusive reset lines. Signed-off-by: Lee Jones --- drivers/phy/phy-stih407-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c index 1d5ae5f8ef69..53cf8d1b4116 100644 --- a/drivers/phy/phy-stih407-usb.c +++ b/drivers/phy/phy-stih407-usb.c @@ -105,7 +105,7 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) phy_dev->dev = dev; dev_set_drvdata(dev, phy_dev); - phy_dev->rstc = devm_reset_control_get(dev, "global"); + phy_dev->rstc = devm_reset_control_get_shared(dev, "global"); if (IS_ERR(phy_dev->rstc)) { dev_err(dev, "failed to ctrl picoPHY reset\n"); return PTR_ERR(phy_dev->rstc); From 82d8eb40bab10082050be945c8e7096df8e9f989 Mon Sep 17 00:00:00 2001 From: Rhyland Klein Date: Thu, 12 May 2016 13:45:04 -0400 Subject: [PATCH 1013/1050] mfd: max77620: Fix FPS switch statements When configuring FPS during probe, assuming a DT node is present for FPS, the code can run into a problem with the switch statements in max77620_config_fps() and max77620_get_fps_period_reg_value(). Namely, in the case of chip->chip_id == MAX77620, it will set fps_[mix|max]_period but then fall through to the default switch case and return -EINVAL. Returning this from max77620_config_fps() will cause probe to fail. Signed-off-by: Rhyland Klein Reviewed-by: Laxman Dewangan Reviewed-by: Thierry Reding Tested-by: Thierry Reding Tested-by: Alexandre Courbot Signed-off-by: Lee Jones --- drivers/mfd/max77620.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index 199d261990be..f32fbb8e8129 100644 --- a/drivers/mfd/max77620.c +++ b/drivers/mfd/max77620.c @@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip, break; case MAX77620: fps_min_period = MAX77620_FPS_PERIOD_MIN_US; + break; default: return -EINVAL; } @@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip, break; case MAX77620: fps_max_period = MAX77620_FPS_PERIOD_MAX_US; + break; default: return -EINVAL; } From ba4a1c28a92df55b9263e838068b92eaa80c7850 Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Mon, 27 Jun 2016 16:06:36 +0100 Subject: [PATCH 1014/1050] mfd: da9053: Fix compiler warning message for uninitialised variable Fix compiler warning caused by an uninitialised variable inside da9052_group_write() function. Defaulting the value to zero covers the trivial case. Signed-off-by: Steve Twiss Reported-by: Geert Uytterhoeven Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index c18a4c19d6fc..ce9230af09c2 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg, static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg, unsigned reg_cnt, unsigned char *val) { - int ret; + int ret = 0; int i; for (i = 0; i < reg_cnt; i++) { From bfa37087aa04e45f56c41142dfceecb79b8e6ef9 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Wed, 29 Jun 2016 21:06:28 +0100 Subject: [PATCH 1015/1050] powerpc: Initialise pci_io_base as early as possible Commit d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") turned kernel memory and IO addresses from #defined constants to variables initialised at runtime. On PA6T (pasemi) systems the setup_arch() machine call initialises the onboard PCI-e root-ports, and uses pci_io_base to do this, which is now before its value has been set, resulting in a panic early in boot before console IO is initialised. Move the pci_io_base initialisation to the same place as vmalloc ranges are set (hash__early_init_mmu()/radix__early_init_mmu()) - this is the earliest possible place we can initialise it. Fixes: d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") Reported-by: Christian Zigotzky Signed-off-by: Darren Stevens Reviewed-by: Aneesh Kumar K.V [mpe: Add #ifdef CONFIG_PCI, massage change log slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 5 +++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b22ba0b58bc..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index e58707deef5c..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ From cab103274352721b77fc5923a631fc63350bef8e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 Jun 2016 23:42:00 +0000 Subject: [PATCH 1016/1050] drm/i915: Fix missing unlock on error in i915_ppgtt_info() Add the missing unlock before return from function i915_ppgtt_info() in the error handling case. Fixes: 1d2ac403ae3b(drm: Protect dev->filelist with its own mutex) Signed-off-by: Wei Yongjun Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1465861320-26221-1-git-send-email-weiyj_lk@163.com (cherry picked from commit b0212486909de4f239ca9f20d032de1b1f2dc52e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 32690332d441..103546834b60 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2365,16 +2365,16 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) task = get_pid_task(file->pid, PIDTYPE_PID); if (!task) { ret = -ESRCH; - goto out_put; + goto out_unlock; } seq_printf(m, "\nproc: %s\n", task->comm); put_task_struct(task); idr_for_each(&file_priv->context_idr, per_file_ctx, (void *)(unsigned long)m); } +out_unlock: mutex_unlock(&dev->filelist_mutex); -out_put: intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); From 5c672ab3f0ee0f78f7acad183f34db0f8781a200 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jun 2016 13:10:49 +0200 Subject: [PATCH 1017/1050] fuse: serialize dirops by default Negotiate with userspace filesystems whether they support parallel readdir and lookup. Disable parallelism by default for fear of breaking fuse filesystems. Signed-off-by: Miklos Szeredi Fixes: 9902af79c01a ("parallel lookups: actual switch to rwsem") Fixes: d9b3dbdcfd62 ("fuse: switch to ->iterate_shared()") --- fs/fuse/dir.c | 4 ++++ fs/fuse/fuse_i.h | 9 +++++++++ fs/fuse/inode.c | 19 ++++++++++++++++++- include/uapi/linux/fuse.h | 7 ++++++- 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccd4971cc6c1..264f07c7754e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct dentry *newent; bool outarg_valid = true; + fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); + fuse_unlock_inode(dir); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } + fuse_lock_inode(inode); fuse_request_send(fc, req); + fuse_unlock_inode(inode); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index eddbe02c4028..929c383432b0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -110,6 +110,9 @@ struct fuse_inode { /** Miscellaneous bits describing inode state */ unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; }; /** FUSE inode state bits */ @@ -540,6 +543,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +void fuse_unlock_inode(struct inode *inode); +void fuse_lock_inode(struct inode *inode); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ce67668a8e1..9961d8432ce3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); + mutex_init(&fi->mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); + mutex_destroy(&fi->mutex); kfree(fi->forget); call_rcu(&inode->i_rcu, fuse_i_callback); } @@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } +void fuse_lock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_lock(&get_fuse_inode(inode)->mutex); +} + +void fuse_unlock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_unlock(&get_fuse_inode(inode)->mutex); +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PARALLEL_DIROPS) + fc->parallel_dirops = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5974fae54e12..27e17363263a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -105,6 +105,9 @@ * * 7.24 * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS */ #ifndef _LINUX_FUSE_H @@ -140,7 +143,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 24 +#define FUSE_KERNEL_MINOR_VERSION 25 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -234,6 +237,7 @@ struct fuse_file_lock { * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -253,6 +257,7 @@ struct fuse_file_lock { #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) /** * CUSE INIT request/reply flags From 54794580f5949253520265e46c903878ab222d84 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 29 Jun 2016 04:27:38 -0400 Subject: [PATCH 1018/1050] ACPI,PCI,IRQ: correct operator precedence The omitted parenthesis prevents the addition operation when acpi_penalize_isa_irq function is called. Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Signed-off-by: Sinan Kaya Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 8fc7323ed3e8..4ed4061813e6 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -839,7 +839,7 @@ void acpi_penalize_isa_irq(int irq, int active) { if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty))) acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) + - active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING; + (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING); } bool acpi_isa_irq_available(int irq) From cb7d224f82e41d82518e7f9ea271d215d4d08e6e Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 30 Jun 2016 10:39:32 -0400 Subject: [PATCH 1019/1050] lockd: unregister notifier blocks if the service fails to come up completely If the lockd service fails to start up then we need to be sure that the notifier blocks are not registered, otherwise a subsequent start of the service could cause the same notifier to be registered twice, leading to soft lockups. Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Fixes: 0751ddf77b6a "lockd: Register callbacks on the inetaddr_chain..." Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 154a107cd376..fc4084ef4736 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_svc_exit_thread(void) +static void lockd_unregister_notifiers(void) { unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif +} + +static void lockd_svc_exit_thread(void) +{ + lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -462,7 +467,7 @@ int lockd_up(struct net *net) * Note: svc_serv structures have an initial use count of 1, * so we exit through here on both success and failure. */ -err_net: +err_put: svc_destroy(serv); err_create: mutex_unlock(&nlmsvc_mutex); @@ -470,7 +475,9 @@ err_create: err_start: lockd_down_net(serv, net); - goto err_net; +err_net: + lockd_unregister_notifiers(); + goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); From b403f0e37a11f84f7ceaf40b0075499e5bcfd220 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jun 2016 10:54:23 +0200 Subject: [PATCH 1020/1050] 9p: use file_dentry() v9fs may be used as lower layer of overlayfs and accessing f_path.dentry can lead to a crash. In this case it's a NULL pointer dereference in p9_fid_create(). Fix by replacing direct access of file->f_path.dentry with the file_dentry() accessor, which will always return a native object. Reported-by: Alessio Igor Bogani Signed-off-by: Miklos Szeredi Tested-by: Alessio Igor Bogani Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") Cc: Signed-off-by: Al Viro --- fs/9p/vfs_file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b84c291ba1eb..d7b78d531e63 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { - fid = v9fs_fid_clone(file->f_path.dentry); + fid = v9fs_fid_clone(file_dentry(file)); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); @@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(filp->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(filp)); if (IS_ERR(fid)) { retval = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); From e06b933e6ded42384164d28a2060b7f89243b895 Mon Sep 17 00:00:00 2001 From: Andrey Ulanov Date: Fri, 15 Apr 2016 14:24:41 -0700 Subject: [PATCH 1021/1050] namespace: update event counter when umounting a deleted dentry - m_start() in fs/namespace.c expects that ns->event is incremented each time a mount added or removed from ns->list. - umount_tree() removes items from the list but does not increment event counter, expecting that it's done before the function is called. - There are some codepaths that call umount_tree() without updating "event" counter. e.g. from __detach_mounts(). - When this happens m_start may reuse a cached mount structure that no longer belongs to ns->list (i.e. use after free which usually leads to infinite loop). This change fixes the above problem by incrementing global event counter before invoking umount_tree(). Change-Id: I622c8e84dcb9fb63542372c5dbf0178ee86bb589 Cc: stable@vger.kernel.org Signed-off-by: Andrey Ulanov Signed-off-by: Al Viro --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691b4355..298618b88bba 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry) goto out_unlock; lock_mount_hash(); + event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { From 8293c8a3bb068bd2d2dfe00b6b0000a8fc5c860a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 3 Jun 2016 11:44:28 +0100 Subject: [PATCH 1022/1050] phy: miphy28lp: Inform the reset framework that our reset line may be shared On the STiH410 B2120 development board the MiPHY28lp shares its reset line with the Synopsys DWC3 SuperSpeed (SS) USB 3.0 Dual-Role-Device (DRD). New functionality in the reset subsystems forces consumers to be explicit when requesting shared/exclusive reset lines. Acked-by: Kishon Vijay Abraham I Signed-off-by: Lee Jones --- drivers/phy/phy-miphy28lp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c index 3acd2a1808df..213e2e15339c 100644 --- a/drivers/phy/phy-miphy28lp.c +++ b/drivers/phy/phy-miphy28lp.c @@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node, struct miphy28lp_dev *miphy_dev = miphy_phy->phydev; int err; - miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst"); + miphy_phy->miphy_rst = + of_reset_control_get_shared(node, "miphy-sw-rst"); if (IS_ERR(miphy_phy->miphy_rst)) { dev_err(miphy_dev->dev, From f5f35830fb84364a24794fe6c7101f85318481d2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 28 Jun 2016 09:33:55 +0100 Subject: [PATCH 1023/1050] phy: phy-stih407-usb: Use explicit reset_control_get_exclusive() API We're making all reset line users specify whether their lines are shared with other IP or they operate them exclusively. In this case the line is exclusively used only by this IP, so use the *_exclusive() API accordingly. Acked-by: Kishon Vijay Abraham I Signed-off-by: Lee Jones --- drivers/phy/phy-stih407-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c index 53cf8d1b4116..b1f44ab669fb 100644 --- a/drivers/phy/phy-stih407-usb.c +++ b/drivers/phy/phy-stih407-usb.c @@ -111,7 +111,7 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) return PTR_ERR(phy_dev->rstc); } - phy_dev->rstport = devm_reset_control_get(dev, "port"); + phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port"); if (IS_ERR(phy_dev->rstport)) { dev_err(dev, "failed to ctrl picoPHY reset\n"); return PTR_ERR(phy_dev->rstport); From 5baaf3b9efe127d239038de9219d381f4d882b26 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 28 Jun 2016 09:24:40 +0100 Subject: [PATCH 1024/1050] usb: dwc3: st: Use explicit reset_control_get_exclusive() API We're making all reset line users specify whether their lines are shared with other IP or they operate them exclusively. In this case the line is exclusively used only by this IP, so use the *_exclusive() API accordingly. Acked-by: Felipe Balbi Signed-off-by: Lee Jones --- drivers/usb/dwc3/dwc3-st.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index b204617ea4e6..e77bacb8358a 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -227,7 +227,8 @@ static int st_dwc3_probe(struct platform_device *pdev) dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n", dwc3_data->glue_base, dwc3_data->syscfg_reg_off); - dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown"); + dwc3_data->rstc_pwrdn = + devm_reset_control_get_exclusive(dev, "powerdown"); if (IS_ERR(dwc3_data->rstc_pwrdn)) { dev_err(&pdev->dev, "could not get power controller\n"); ret = PTR_ERR(dwc3_data->rstc_pwrdn); From 6343a2120862f7023006c8091ad95c1f16a32077 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 1 Jul 2016 14:56:07 +0200 Subject: [PATCH 1025/1050] locks: use file_inode() (Another one for the f_path debacle.) ltp fcntl33 testcase caused an Oops in selinux_file_send_sigiotask. The reason is that generic_add_lease() used filp->f_path.dentry->inode while all the others use file_inode(). This makes a difference for files opened on overlayfs since the former will point to the overlay inode the latter to the underlying inode. So generic_add_lease() added the lease to the overlay inode and generic_delete_lease() removed it from the underlying inode. When the file was released the lease remained on the overlay inode's lock list, resulting in use after free. Reported-by: Eryu Guan Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") Cc: Signed-off-by: Miklos Szeredi Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index 7c5f91be9b65..ee1b15f6fc13 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr { struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; From 6d037de90a1fd7b4879b48d4dd5c4839b271be98 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 1 Jul 2016 15:01:01 +0200 Subject: [PATCH 1026/1050] MIPS: Fix possible corruption of cache mode by mprotect. The following testcase may result in a page table entries with a invalid CCA field being generated: static void *bindstack; static int sysrqfd; static void protect_low(int protect) { mprotect(bindstack, BINDSTACK_SIZE, protect); } static void sigbus_handler(int signal, siginfo_t * info, void *context) { void *addr = info->si_addr; write(sysrqfd, "x", 1); printf("sigbus, fault address %p (should not happen, but might)\n", addr); abort(); } static void run_bind_test(void) { unsigned int *p = bindstack; p[0] = 0xf001f001; write(sysrqfd, "x", 1); /* Set trap on access to p[0] */ protect_low(PROT_NONE); write(sysrqfd, "x", 1); /* Clear trap on access to p[0] */ protect_low(PROT_READ | PROT_WRITE | PROT_EXEC); write(sysrqfd, "x", 1); /* Check the contents of p[0] */ if (p[0] != 0xf001f001) { write(sysrqfd, "x", 1); /* Reached, but shouldn't be */ printf("badness, shouldn't happen but does\n"); abort(); } } int main(void) { struct sigaction sa; sysrqfd = open("/proc/sysrq-trigger", O_WRONLY); if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) { perror("sigprocmask"); return 0; } sa.sa_sigaction = sigbus_handler; sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART; if (sigaction(SIGBUS, &sa, NULL)) { perror("sigaction"); return 0; } bindstack = mmap(NULL, BINDSTACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (bindstack == MAP_FAILED) { perror("mmap bindstack"); return 0; } printf("bindstack: %p\n", bindstack); run_bind_test(); printf("done\n"); return 0; } There are multiple ingredients for this: 1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3 on all platforms except SB1 where it's CCA 5. 2) _page_cachable_default must have bits set which are not set _CACHE_CACHABLE_NONCOHERENT. 3) Either the defective version of pte_modify for XPA or the standard version must be in used. However pte_modify for the 36 bit address space support is no affected. In that case additional bits in the final CCA mode may generate an invalid value for the CCA field. On the R10000 system where this was tracked down for example a CCA 7 has been observed, which is Uncached Accelerated. Fixed by: 1) Using the proper CCA mode for PAGE_NONE just like for all the other PAGE_* pte/pmd bits. 2) Fix the two affected variants of pte_modify. Further code inspection also shows the same issue to exist in pmd_modify which would affect huge page systems. Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE and pmd_modify issue found by me. The history of this goes back beyond Linus' git history. Chris Dearman's commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of the cache attribute at run time.") missed the opportunity to fix this but it was originally introduced in lmo commit d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.") and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option CONFIG_MIPS_UNCACHED.") Signed-off-by: Ralf Baechle Reported-by: Alastair Bridgewater --- arch/mips/include/asm/pgtable.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index a6b611f1da43..f53816744d60 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -24,7 +24,7 @@ struct mm_struct; struct vm_area_struct; #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _CACHE_CACHABLE_NONCOHERENT) + _page_cachable_default) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ _page_cachable_default) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ @@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK); pte.pte_high &= (_PFN_MASK | _CACHE_MASK); pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK; - pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK; + pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK); return pte; } #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } #endif @@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot); + pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } From e7c0b5991dd1be7b6f6dc2b54a15a0f47b64b007 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 1 Jul 2016 10:02:44 -0400 Subject: [PATCH 1027/1050] ovl: warn instead of error if d_type is not supported overlay needs underlying fs to support d_type. Recently I put in a patch in to detect this condition and started failing mount if underlying fs did not support d_type. But this breaks existing configurations over kernel upgrade. Those who are running docker (partially broken configuration) with xfs not supporting d_type, are surprised that after kernel upgrade docker does not run anymore. https://github.com/docker/docker/issues/22937#issuecomment-229881315 So instead of erroring out, detect broken configuration and warn about it. This should allow existing docker setups to continue working after kernel upgrade. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi Fixes: 45aebeaf4f67 ("ovl: Ensure upper filesystem supports d_type") Cc: 4.6 --- fs/overlayfs/super.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ce02f46029da..9a7693d5f8ff 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err < 0) goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; - } + /* + * We allowed this configuration and don't want to + * break users over kernel upgrade. So warn instead + * of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); } } From a99cde438de0c4c0cecc1d1af1a55a75b10bfdef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jul 2016 23:01:00 -0700 Subject: [PATCH 1028/1050] Linux 4.7-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2d24babe6f8b..0d504893df6e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From e675447bda51c1ea72d1ac9132ce3bed974f1da3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:15 +0000 Subject: [PATCH 1029/1050] timers: Make 'pinned' a timer property We want to move the timer migration logic from a 'push' to a 'pull' model. Under the current 'push' model pinned timers are handled via a runtime API variant: mod_timer_pinned(). The 'pull' model requires us to store the pinned attribute of a timer in the timer_list structure itself, as a new TIMER_PINNED bit in timer->flags. This flag must be set at initialization time and the timer APIs recognize the flag. This patch: - Implements the new flag and associated new-style initialization methods - makes mod_timer() recognize new-style pinned timers, - and adds some migration helper facility to allow step by step conversion of old-style to new-style pinned timers. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.049338558@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 25 ++++++++++++++++++++++--- kernel/time/timer.c | 10 +++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 20ac746f3eb3..046d6cf26498 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -62,7 +62,8 @@ struct timer_list { #define TIMER_MIGRATING 0x00080000 #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) #define TIMER_DEFERRABLE 0x00100000 -#define TIMER_IRQSAFE 0x00200000 +#define TIMER_PINNED 0x00200000 +#define TIMER_IRQSAFE 0x00400000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ @@ -78,9 +79,15 @@ struct timer_list { #define TIMER_INITIALIZER(_function, _expires, _data) \ __TIMER_INITIALIZER((_function), (_expires), (_data), 0) +#define TIMER_PINNED_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_PINNED) + #define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \ __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE) +#define TIMER_PINNED_DEFERRED_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE | TIMER_PINNED) + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) @@ -124,8 +131,12 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define init_timer(timer) \ __init_timer((timer), 0) +#define init_timer_pinned(timer) \ + __init_timer((timer), TIMER_PINNED) #define init_timer_deferrable(timer) \ __init_timer((timer), TIMER_DEFERRABLE) +#define init_timer_pinned_deferrable(timer) \ + __init_timer((timer), TIMER_DEFERRABLE | TIMER_PINNED) #define init_timer_on_stack(timer) \ __init_timer_on_stack((timer), 0) @@ -145,12 +156,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define setup_timer(timer, fn, data) \ __setup_timer((timer), (fn), (data), 0) +#define setup_pinned_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), TIMER_PINNED) #define setup_deferrable_timer(timer, fn, data) \ __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE) +#define setup_pinned_deferrable_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) #define setup_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), 0) +#define setup_pinned_timer_on_stack(timer, fn, data) \ + __setup_timer_on_stack((timer), (fn), (data), TIMER_PINNED) #define setup_deferrable_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE) +#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ + __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) /** * timer_pending - is a timer pending? @@ -175,8 +194,8 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); extern void set_timer_slack(struct timer_list *time, int slack_hz); -#define TIMER_NOT_PINNED 0 -#define TIMER_PINNED 1 +#define MOD_TIMER_NOT_PINNED 0 +#define MOD_TIMER_PINNED 1 /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3a95f9728778..693f6d14058e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -782,7 +782,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = get_target_base(base, pinned); + new_base = get_target_base(base, pinned || timer->flags & TIMER_PINNED); if (base != new_base) { /* @@ -825,7 +825,7 @@ out_unlock: */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); + return __mod_timer(timer, expires, true, MOD_TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer_pending); @@ -900,7 +900,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; - return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); + return __mod_timer(timer, expires, false, MOD_TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); @@ -928,7 +928,7 @@ int mod_timer_pinned(struct timer_list *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - return __mod_timer(timer, expires, false, TIMER_PINNED); + return __mod_timer(timer, expires, false, MOD_TIMER_PINNED); } EXPORT_SYMBOL(mod_timer_pinned); @@ -1512,7 +1512,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); + __mod_timer(&timer, expire, false, MOD_TIMER_NOT_PINNED); schedule(); del_singleshot_timer_sync(&timer); From 920a4a70c55058a9997f2e35bf41503acf87c301 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:16 +0000 Subject: [PATCH 1030/1050] timers, x86/apic/uv: Initialize the UV heartbeat timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.133837204@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 29003154fafd..7a50519e6afc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -919,7 +919,7 @@ static void uv_heartbeat(unsigned long ignored) uv_set_scir_bits(bits); /* enable next timer period */ - mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); + mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); } static void uv_heartbeat_enable(int cpu) @@ -928,7 +928,7 @@ static void uv_heartbeat_enable(int cpu) struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); - setup_timer(timer, uv_heartbeat, cpu); + setup_pinned_timer(timer, uv_heartbeat, cpu); timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); uv_cpu_scir_info(cpu)->enabled = 1; From f9c287ba3861714a1959accf14e815c44291bec4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:17 +0000 Subject: [PATCH 1031/1050] timers, x86/mce: Initialize MCE restart timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.215783439@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 92e5e37d97bf..b80a6361a9e1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1309,7 +1309,7 @@ static void __restart_timer(struct timer_list *t, unsigned long interval) if (timer_pending(t)) { if (time_before(when, t->expires)) - mod_timer_pinned(t, when); + mod_timer(t, when); } else { t->expires = round_jiffies(when); add_timer_on(t, smp_processor_id()); @@ -1735,7 +1735,7 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + setup_pinned_timer(t, mce_timer_fn, cpu); mce_start_timer(cpu, t); } From 7bc54b652f13119f64e87dd96bb792efbfc5a786 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:18 +0000 Subject: [PATCH 1032/1050] timers, cpufreq/powernv: Initialize the gpstate timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.297014487@linutronix.de Signed-off-by: Ingo Molnar --- drivers/cpufreq/powernv-cpufreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 54c45368e3f1..6bd715b7f11c 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -530,8 +530,7 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) else timer_interval = GPSTATE_TIMER_INTERVAL; - mod_timer_pinned(&gpstates->timer, jiffies + - msecs_to_jiffies(timer_interval)); + mod_timer(&gpstates->timer, jiffies + msecs_to_jiffies(timer_interval)); } /** @@ -699,7 +698,7 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->driver_data = gpstates; /* initialize timer */ - init_timer_deferrable(&gpstates->timer); + init_timer_pinned_deferrable(&gpstates->timer); gpstates->timer.data = (unsigned long)policy; gpstates->timer.function = gpstate_timer_handler; gpstates->timer.expires = jiffies + From 25df57605edbe92b6f4215226bd40e1a465ef6b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:19 +0000 Subject: [PATCH 1033/1050] timers, driver/net/ethernet/tile: Initialize the egress timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.376394205@linutronix.de Signed-off-by: Ingo Molnar --- drivers/net/ethernet/tile/tilepro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 922a443e3415..4ef605a90247 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -588,7 +588,7 @@ static bool tile_net_lepp_free_comps(struct net_device *dev, bool all) static void tile_net_schedule_egress_timer(struct tile_net_cpu *info) { if (!info->egress_timer_scheduled) { - mod_timer_pinned(&info->egress_timer, jiffies + 1); + mod_timer(&info->egress_timer, jiffies + 1); info->egress_timer_scheduled = true; } } @@ -1004,7 +1004,7 @@ static void tile_net_register(void *dev_ptr) BUG(); /* Initialize the egress timer. */ - init_timer(&info->egress_timer); + init_timer_pinned(&info->egress_timer); info->egress_timer.data = (long)info; info->egress_timer.function = tile_net_handle_egress_timer; From 01bab536b1fe982fcba8144e2ee11ac889a22f0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:21 +0000 Subject: [PATCH 1034/1050] timers, drivers/tty/metag_da: Initialize the poll timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.456452642@linutronix.de Signed-off-by: Ingo Molnar --- drivers/tty/metag_da.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c index 9325262289f9..25ccef2fe748 100644 --- a/drivers/tty/metag_da.c +++ b/drivers/tty/metag_da.c @@ -323,12 +323,12 @@ static void dashtty_timer(unsigned long ignored) if (channel >= 0) fetch_data(channel); - mod_timer_pinned(&poll_timer, jiffies + DA_TTY_POLL); + mod_timer(&poll_timer, jiffies + DA_TTY_POLL); } static void add_poll_timer(struct timer_list *poll_timer) { - setup_timer(poll_timer, dashtty_timer, 0); + setup_pinned_timer(poll_timer, dashtty_timer, 0); poll_timer->expires = jiffies + DA_TTY_POLL; /* From 853f90d49bbabcf4e01c615402c1bea1871d7646 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:22 +0000 Subject: [PATCH 1035/1050] timers, drivers/tty/mips_ejtag: Initialize the poll timer as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.537448301@linutronix.de Signed-off-by: Ingo Molnar --- drivers/tty/mips_ejtag_fdc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c index a119176a1855..234123b0c642 100644 --- a/drivers/tty/mips_ejtag_fdc.c +++ b/drivers/tty/mips_ejtag_fdc.c @@ -689,7 +689,7 @@ static void mips_ejtag_fdc_tty_timer(unsigned long opaque) mips_ejtag_fdc_handle(priv); if (!priv->removing) - mod_timer_pinned(&priv->poll_timer, jiffies + FDC_TTY_POLL); + mod_timer(&priv->poll_timer, jiffies + FDC_TTY_POLL); } /* TTY Port operations */ @@ -1002,7 +1002,7 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev) raw_spin_unlock_irq(&priv->lock); } else { /* If we didn't get an usable IRQ, poll instead */ - setup_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer, + setup_pinned_timer(&priv->poll_timer, mips_ejtag_fdc_tty_timer, (unsigned long)priv); priv->poll_timer.expires = jiffies + FDC_TTY_POLL; /* From f3438bc7813c439a06104ba074301c8bdd64ac8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:23 +0000 Subject: [PATCH 1036/1050] timers, net/ipv4/inet: Initialize connection request timers as pinned Pinned timers must carry the pinned attribute in the timer structure itself, so convert the code to the new API. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.617891430@linutronix.de Signed-off-by: Ingo Molnar --- net/ipv4/inet_connection_sock.c | 7 ++++--- net/ipv4/inet_timewait_sock.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fa8c39804bdb..61a9deec2993 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -603,7 +603,7 @@ static void reqsk_timer_handler(unsigned long data) if (req->num_timeout++ == 0) atomic_dec(&queue->young); timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); - mod_timer_pinned(&req->rsk_timer, jiffies + timeo); + mod_timer(&req->rsk_timer, jiffies + timeo); return; } drop: @@ -617,8 +617,9 @@ static void reqsk_queue_hash_req(struct request_sock *req, req->num_timeout = 0; req->sk = NULL; - setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); - mod_timer_pinned(&req->rsk_timer, jiffies + timeout); + setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, + (unsigned long)req); + mod_timer(&req->rsk_timer, jiffies + timeout); inet_ehash_insert(req_to_sk(req), NULL); /* before letting lookups find us, make sure all req fields diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 206581674806..ddcd56c08d14 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -188,7 +188,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); - setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw); + setup_pinned_timer(&tw->tw_timer, tw_timer_handler, + (unsigned long)tw); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this @@ -248,7 +249,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) tw->tw_kill = timeo <= 4*HZ; if (!rearm) { - BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo)); + BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo)); atomic_inc(&tw->tw_dr->tw_count); } else { mod_timer_pending(&tw->tw_timer, jiffies + timeo); From 177ec0a0a531695210b277d734b2f92ee5796303 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:24 +0000 Subject: [PATCH 1037/1050] timers: Remove the deprecated mod_timer_pinned() API We switched all users to initialize the timers as pinned and call mod_timer(). Remove the now unused timer API function. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.706205231@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 3 --- kernel/time/timer.c | 39 +++++---------------------------------- 2 files changed, 5 insertions(+), 37 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 046d6cf26498..a8f6c70eb414 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -190,12 +190,9 @@ extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); -extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); extern void set_timer_slack(struct timer_list *time, int slack_hz); -#define MOD_TIMER_NOT_PINNED 0 -#define MOD_TIMER_PINNED 1 /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 693f6d14058e..ba49c1cf80f5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -764,8 +764,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, } static inline int -__mod_timer(struct timer_list *timer, unsigned long expires, - bool pending_only, int pinned) +__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { struct tvec_base *base, *new_base; unsigned long flags; @@ -782,7 +781,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = get_target_base(base, pinned || timer->flags & TIMER_PINNED); + new_base = get_target_base(base, timer->flags & TIMER_PINNED); if (base != new_base) { /* @@ -825,7 +824,7 @@ out_unlock: */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, true, MOD_TIMER_NOT_PINNED); + return __mod_timer(timer, expires, true); } EXPORT_SYMBOL(mod_timer_pending); @@ -900,38 +899,10 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; - return __mod_timer(timer, expires, false, MOD_TIMER_NOT_PINNED); + return __mod_timer(timer, expires, false); } EXPORT_SYMBOL(mod_timer); -/** - * mod_timer_pinned - modify a timer's timeout - * @timer: the timer to be modified - * @expires: new timeout in jiffies - * - * mod_timer_pinned() is a way to update the expire field of an - * active timer (if the timer is inactive it will be activated) - * and to ensure that the timer is scheduled on the current CPU. - * - * Note that this does not prevent the timer from being migrated - * when the current CPU goes offline. If this is a problem for - * you, use CPU-hotplug notifiers to handle it correctly, for - * example, cancelling the timer when the corresponding CPU goes - * offline. - * - * mod_timer_pinned(timer, expires) is equivalent to: - * - * del_timer(timer); timer->expires = expires; add_timer(timer); - */ -int mod_timer_pinned(struct timer_list *timer, unsigned long expires) -{ - if (timer->expires == expires && timer_pending(timer)) - return 1; - - return __mod_timer(timer, expires, false, MOD_TIMER_PINNED); -} -EXPORT_SYMBOL(mod_timer_pinned); - /** * add_timer - start a timer * @timer: the timer to be added @@ -1512,7 +1483,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire, false, MOD_TIMER_NOT_PINNED); + __mod_timer(&timer, expire, false); schedule(); del_singleshot_timer_sync(&timer); From 2b1ecc3d1a6b10f8fbac7f83d80db30b5a2c2791 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:25 +0000 Subject: [PATCH 1038/1050] signals: Use hrtimer for sigtimedwait() We've converted most timeout related syscalls to hrtimers, but sigtimedwait() did not get this treatment. Convert it so we get a reasonable accuracy and remove the user space exposure to the timer wheel properties. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Al Viro Cc: Arjan van de Ven Cc: Chris Mason Cc: Cyril Hrubis Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.787164909@linutronix.de Signed-off-by: Ingo Molnar --- kernel/signal.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 96e9bc40667f..af21afc00d08 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2751,23 +2751,18 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) * @ts: upper bound on process time suspension */ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, - const struct timespec *ts) + const struct timespec *ts) { + ktime_t *to = NULL, timeout = { .tv64 = KTIME_MAX }; struct task_struct *tsk = current; - long timeout = MAX_SCHEDULE_TIMEOUT; sigset_t mask = *which; - int sig; + int sig, ret = 0; if (ts) { if (!timespec_valid(ts)) return -EINVAL; - timeout = timespec_to_jiffies(ts); - /* - * We can be close to the next tick, add another one - * to ensure we will wait at least the time asked for. - */ - if (ts->tv_sec || ts->tv_nsec) - timeout++; + timeout = timespec_to_ktime(*ts); + to = &timeout; } /* @@ -2778,7 +2773,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, spin_lock_irq(&tsk->sighand->siglock); sig = dequeue_signal(tsk, &mask, info); - if (!sig && timeout) { + if (!sig && timeout.tv64) { /* * None ready, temporarily unblock those we're interested * while we are sleeping in so that we'll be awakened when @@ -2790,8 +2785,9 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); - timeout = freezable_schedule_timeout_interruptible(timeout); - + __set_current_state(TASK_INTERRUPTIBLE); + ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns, + HRTIMER_MODE_REL); spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); sigemptyset(&tsk->real_blocked); @@ -2801,7 +2797,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, if (sig) return sig; - return timeout ? -EINTR : -EAGAIN; + return ret ? -EINTR : -EAGAIN; } /** From 15dba1e37b5cfd7fab9bc84e0f05f35c918f4eef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:27 +0000 Subject: [PATCH 1039/1050] hlist: Add hlist_is_singular_node() helper Required to figure out whether the entry is the only one in the hlist. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.867631372@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/list.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/list.h b/include/linux/list.h index 5356f4d661a7..5183138aa932 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -678,6 +678,16 @@ static inline bool hlist_fake(struct hlist_node *h) return h->pprev == &h->next; } +/* + * Check whether the node is the only node of the head without + * accessing head: + */ +static inline bool +hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h) +{ + return !n->next && n->pprev == &h->first; +} + /* * Move a list from one list head to another. Fixup the pprev * reference of the first entry if it exists. From 494af3ed7848de08640d98ee5aff57a45c137c3c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:28 +0000 Subject: [PATCH 1040/1050] timers: Give a few structs and members proper names Some of the names in the internal implementation of the timer code are not longer correct and others are simply too long to type. Clean it up before we switch the wheel implementation over to the new scheme. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.948752516@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 118 ++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ba49c1cf80f5..f259a3ef4577 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -77,10 +77,10 @@ struct tvec_root { struct hlist_head vec[TVR_SIZE]; }; -struct tvec_base { +struct timer_base { spinlock_t lock; struct timer_list *running_timer; - unsigned long timer_jiffies; + unsigned long clk; unsigned long next_timer; unsigned long active_timers; unsigned long all_timers; @@ -95,7 +95,7 @@ struct tvec_base { } ____cacheline_aligned; -static DEFINE_PER_CPU(struct tvec_base, tvec_bases); +static DEFINE_PER_CPU(struct timer_base, timer_bases); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; @@ -106,15 +106,15 @@ void timers_update_migration(bool update_nohz) unsigned int cpu; /* Avoid the loop, if nothing to update */ - if (this_cpu_read(tvec_bases.migration_enabled) == on) + if (this_cpu_read(timer_bases.migration_enabled) == on) return; for_each_possible_cpu(cpu) { - per_cpu(tvec_bases.migration_enabled, cpu) = on; + per_cpu(timer_bases.migration_enabled, cpu) = on; per_cpu(hrtimer_bases.migration_enabled, cpu) = on; if (!update_nohz) continue; - per_cpu(tvec_bases.nohz_active, cpu) = true; + per_cpu(timer_bases.nohz_active, cpu) = true; per_cpu(hrtimer_bases.nohz_active, cpu) = true; } } @@ -134,18 +134,18 @@ int timer_migration_handler(struct ctl_table *table, int write, return ret; } -static inline struct tvec_base *get_target_base(struct tvec_base *base, +static inline struct timer_base *get_target_base(struct timer_base *base, int pinned) { if (pinned || !base->migration_enabled) - return this_cpu_ptr(&tvec_bases); - return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); + return this_cpu_ptr(&timer_bases); + return per_cpu_ptr(&timer_bases, get_nohz_timer_target()); } #else -static inline struct tvec_base *get_target_base(struct tvec_base *base, +static inline struct timer_base *get_target_base(struct timer_base *base, int pinned) { - return this_cpu_ptr(&tvec_bases); + return this_cpu_ptr(&timer_bases); } #endif @@ -371,10 +371,10 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) EXPORT_SYMBOL_GPL(set_timer_slack); static void -__internal_add_timer(struct tvec_base *base, struct timer_list *timer) +__internal_add_timer(struct timer_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; - unsigned long idx = expires - base->timer_jiffies; + unsigned long idx = expires - base->clk; struct hlist_head *vec; if (idx < TVR_SIZE) { @@ -394,7 +394,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) * Can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ - vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); + vec = base->tv1.vec + (base->clk & TVR_MASK); } else { int i; /* If the timeout is larger than MAX_TVAL (on 64-bit @@ -403,7 +403,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) */ if (idx > MAX_TVAL) { idx = MAX_TVAL; - expires = idx + base->timer_jiffies; + expires = idx + base->clk; } i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; vec = base->tv5.vec + i; @@ -412,11 +412,11 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) hlist_add_head(&timer->entry, vec); } -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +static void internal_add_timer(struct timer_base *base, struct timer_list *timer) { /* Advance base->jiffies, if the base is empty */ if (!base->all_timers++) - base->timer_jiffies = jiffies; + base->clk = jiffies; __internal_add_timer(base, timer); /* @@ -707,7 +707,7 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending) } static inline void -detach_expired_timer(struct timer_list *timer, struct tvec_base *base) +detach_expired_timer(struct timer_list *timer, struct timer_base *base) { detach_timer(timer, true); if (!(timer->flags & TIMER_DEFERRABLE)) @@ -715,7 +715,7 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base) base->all_timers--; } -static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, +static int detach_if_pending(struct timer_list *timer, struct timer_base *base, bool clear_pending) { if (!timer_pending(timer)) @@ -725,16 +725,16 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, if (!(timer->flags & TIMER_DEFERRABLE)) { base->active_timers--; if (timer->expires == base->next_timer) - base->next_timer = base->timer_jiffies; + base->next_timer = base->clk; } /* If this was the last timer, advance base->jiffies */ if (!--base->all_timers) - base->timer_jiffies = jiffies; + base->clk = jiffies; return 1; } /* - * We are using hashed locking: holding per_cpu(tvec_bases).lock + * We are using hashed locking: holding per_cpu(timer_bases).lock * means that all timers which are tied to this base via timer->base are * locked, and the base itself is locked too. * @@ -744,16 +744,16 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, * When the timer's base is locked and removed from the list, the * TIMER_MIGRATING flag is set, FIXME */ -static struct tvec_base *lock_timer_base(struct timer_list *timer, +static struct timer_base *lock_timer_base(struct timer_list *timer, unsigned long *flags) __acquires(timer->base->lock) { for (;;) { u32 tf = timer->flags; - struct tvec_base *base; + struct timer_base *base; if (!(tf & TIMER_MIGRATING)) { - base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); + base = per_cpu_ptr(&timer_bases, tf & TIMER_CPUMASK); spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; @@ -766,7 +766,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, static inline int __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { - struct tvec_base *base, *new_base; + struct timer_base *base, *new_base; unsigned long flags; int ret = 0; @@ -933,8 +933,8 @@ EXPORT_SYMBOL(add_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu); - struct tvec_base *base; + struct timer_base *new_base = per_cpu_ptr(&timer_bases, cpu); + struct timer_base *base; unsigned long flags; timer_stats_timer_set_start_info(timer); @@ -975,7 +975,7 @@ EXPORT_SYMBOL_GPL(add_timer_on); */ int del_timer(struct timer_list *timer) { - struct tvec_base *base; + struct timer_base *base; unsigned long flags; int ret = 0; @@ -1001,7 +1001,7 @@ EXPORT_SYMBOL(del_timer); */ int try_to_del_timer_sync(struct timer_list *timer) { - struct tvec_base *base; + struct timer_base *base; unsigned long flags; int ret = -1; @@ -1085,7 +1085,7 @@ int del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(del_timer_sync); #endif -static int cascade(struct tvec_base *base, struct tvec *tv, int index) +static int cascade(struct timer_base *base, struct tvec *tv, int index) { /* cascade all the timers from tv up one level */ struct timer_list *timer; @@ -1149,7 +1149,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), } } -#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) +#define INDEX(N) ((base->clk >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) /** * __run_timers - run all expired timers (if any) on this CPU. @@ -1158,23 +1158,23 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), * This function cascades all vectors and executes all expired timer * vectors. */ -static inline void __run_timers(struct tvec_base *base) +static inline void __run_timers(struct timer_base *base) { struct timer_list *timer; spin_lock_irq(&base->lock); - while (time_after_eq(jiffies, base->timer_jiffies)) { + while (time_after_eq(jiffies, base->clk)) { struct hlist_head work_list; struct hlist_head *head = &work_list; int index; if (!base->all_timers) { - base->timer_jiffies = jiffies; + base->clk = jiffies; break; } - index = base->timer_jiffies & TVR_MASK; + index = base->clk & TVR_MASK; /* * Cascade timers: @@ -1184,7 +1184,7 @@ static inline void __run_timers(struct tvec_base *base) (!cascade(base, &base->tv3, INDEX(1))) && !cascade(base, &base->tv4, INDEX(2))) cascade(base, &base->tv5, INDEX(3)); - ++base->timer_jiffies; + ++base->clk; hlist_move_list(base->tv1.vec + index, head); while (!hlist_empty(head)) { void (*fn)(unsigned long); @@ -1222,16 +1222,16 @@ static inline void __run_timers(struct tvec_base *base) * is used on S/390 to stop all activity when a CPU is idle. * This function needs to be called with interrupts disabled. */ -static unsigned long __next_timer_interrupt(struct tvec_base *base) +static unsigned long __next_timer_interrupt(struct timer_base *base) { - unsigned long timer_jiffies = base->timer_jiffies; - unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; + unsigned long clk = base->clk; + unsigned long expires = clk + NEXT_TIMER_MAX_DELTA; int index, slot, array, found = 0; struct timer_list *nte; struct tvec *varray[4]; /* Look for timer events in tv1. */ - index = slot = timer_jiffies & TVR_MASK; + index = slot = clk & TVR_MASK; do { hlist_for_each_entry(nte, base->tv1.vec + slot, entry) { if (nte->flags & TIMER_DEFERRABLE) @@ -1250,8 +1250,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base) cascade: /* Calculate the next cascade event */ if (index) - timer_jiffies += TVR_SIZE - index; - timer_jiffies >>= TVR_BITS; + clk += TVR_SIZE - index; + clk >>= TVR_BITS; /* Check tv2-tv5. */ varray[0] = &base->tv2; @@ -1262,7 +1262,7 @@ cascade: for (array = 0; array < 4; array++) { struct tvec *varp = varray[array]; - index = slot = timer_jiffies & TVN_MASK; + index = slot = clk & TVN_MASK; do { hlist_for_each_entry(nte, varp->vec + slot, entry) { if (nte->flags & TIMER_DEFERRABLE) @@ -1286,8 +1286,8 @@ cascade: } while (slot != index); if (index) - timer_jiffies += TVN_SIZE - index; - timer_jiffies >>= TVN_BITS; + clk += TVN_SIZE - index; + clk >>= TVN_BITS; } return expires; } @@ -1335,7 +1335,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { - struct tvec_base *base = this_cpu_ptr(&tvec_bases); + struct timer_base *base = this_cpu_ptr(&timer_bases); u64 expires = KTIME_MAX; unsigned long nextevt; @@ -1348,7 +1348,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) spin_lock(&base->lock); if (base->active_timers) { - if (time_before_eq(base->next_timer, base->timer_jiffies)) + if (time_before_eq(base->next_timer, base->clk)) base->next_timer = __next_timer_interrupt(base); nextevt = base->next_timer; if (time_before_eq(nextevt, basej)) @@ -1387,9 +1387,9 @@ void update_process_times(int user_tick) */ static void run_timer_softirq(struct softirq_action *h) { - struct tvec_base *base = this_cpu_ptr(&tvec_bases); + struct timer_base *base = this_cpu_ptr(&timer_bases); - if (time_after_eq(jiffies, base->timer_jiffies)) + if (time_after_eq(jiffies, base->clk)) __run_timers(base); } @@ -1534,7 +1534,7 @@ signed long __sched schedule_timeout_idle(signed long timeout) EXPORT_SYMBOL(schedule_timeout_idle); #ifdef CONFIG_HOTPLUG_CPU -static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) +static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head) { struct timer_list *timer; int cpu = new_base->cpu; @@ -1550,13 +1550,13 @@ static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *he static void migrate_timers(int cpu) { - struct tvec_base *old_base; - struct tvec_base *new_base; + struct timer_base *old_base; + struct timer_base *new_base; int i; BUG_ON(cpu_online(cpu)); - old_base = per_cpu_ptr(&tvec_bases, cpu); - new_base = get_cpu_ptr(&tvec_bases); + old_base = per_cpu_ptr(&timer_bases, cpu); + new_base = get_cpu_ptr(&timer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. @@ -1580,7 +1580,7 @@ static void migrate_timers(int cpu) spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); - put_cpu_ptr(&tvec_bases); + put_cpu_ptr(&timer_bases); } static int timer_cpu_notify(struct notifier_block *self, @@ -1608,13 +1608,13 @@ static inline void timer_register_cpu_notifier(void) { } static void __init init_timer_cpu(int cpu) { - struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); + struct timer_base *base = per_cpu_ptr(&timer_bases, cpu); base->cpu = cpu; spin_lock_init(&base->lock); - base->timer_jiffies = jiffies; - base->next_timer = base->timer_jiffies; + base->clk = jiffies; + base->next_timer = base->clk; } static void __init init_timer_cpus(void) From b0d6e2dcb284f1f4dcb4b92760f49eeaf5fc0bc7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:29 +0000 Subject: [PATCH 1041/1050] timers: Reduce the CPU index space to 256k We want to store the array index in the flags space. 256k CPUs should be enough for a while. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.030144293@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index a8f6c70eb414..989f33d16ebf 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -58,12 +58,12 @@ struct timer_list { * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! */ -#define TIMER_CPUMASK 0x0007FFFF -#define TIMER_MIGRATING 0x00080000 +#define TIMER_CPUMASK 0x0003FFFF +#define TIMER_MIGRATING 0x00040000 #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) -#define TIMER_DEFERRABLE 0x00100000 -#define TIMER_PINNED 0x00200000 -#define TIMER_IRQSAFE 0x00400000 +#define TIMER_DEFERRABLE 0x00080000 +#define TIMER_PINNED 0x00100000 +#define TIMER_IRQSAFE 0x00200000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ From 500462a9de657f86edaa102f8ab6bff7f7e43fc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:30 +0000 Subject: [PATCH 1042/1050] timers: Switch to a non-cascading wheel The current timer wheel has some drawbacks: 1) Cascading: Cascading can be an unbound operation and is completely pointless in most cases because the vast majority of the timer wheel timers are canceled or rearmed before expiration. (They are used as timeout safeguards, not as real timers to measure time.) 2) No fast lookup of the next expiring timer: In NOHZ scenarios the first timer soft interrupt after a long NOHZ period must fast forward the base time to the current value of jiffies. As we have no way to find the next expiring timer fast, the code loops linearly and increments the base time one by one and checks for expired timers in each step. This causes unbound overhead spikes exactly in the moment when we should wake up as fast as possible. After a thorough analysis of real world data gathered on laptops, workstations, webservers and other machines (thanks Chris!) I came to the conclusion that the current 'classic' timer wheel implementation can be modified to address the above issues. The vast majority of timer wheel timers is canceled or rearmed before expiry. Most of them are timeouts for networking and other I/O tasks. The nature of timeouts is to catch the exception from normal operation (TCP ack timed out, disk does not respond, etc.). For these kinds of timeouts the accuracy of the timeout is not really a concern. Timeouts are very often approximate worst-case values and in case the timeout fires, we already waited for a long time and performance is down the drain already. The few timers which actually expire can be split into two categories: 1) Short expiry times which expect halfways accurate expiry 2) Long term expiry times are inaccurate today already due to the batching which is done for NOHZ automatically and also via the set_timer_slack() API. So for long term expiry timers we can avoid the cascading property and just leave them in the less granular outer wheels until expiry or cancelation. Timers which are armed with a timeout larger than the wheel capacity are no longer cascaded. We expire them with the longest possible timeout (6+ days). We have not observed such timeouts in our data collection, but at least we handle them, applying the rule of the least surprise. To avoid extending the wheel levels for HZ=1000 so we can accomodate the longest observed timeouts (5 days in the network conntrack code) we reduce the first level granularity on HZ=1000 to 4ms, which effectively is the same as the HZ=250 behaviour. From our data analysis there is nothing which relies on that 1ms granularity and as a side effect we get better batching and timer locality for the networking code as well. Contrary to the classic wheel the granularity of the next wheel is not the capacity of the first wheel. The granularities of the wheels are in the currently chosen setting 8 times the granularity of the previous wheel. So for HZ=250 we end up with the following granularity levels: Level Offset Granularity Range 0 0 4 ms 0 ms - 252 ms 1 64 32 ms 256 ms - 2044 ms (256ms - ~2s) 2 128 256 ms 2048 ms - 16380 ms (~2s - ~16s) 3 192 2048 ms (~2s) 16384 ms - 131068 ms (~16s - ~2m) 4 256 16384 ms (~16s) 131072 ms - 1048572 ms (~2m - ~17m) 5 320 131072 ms (~2m) 1048576 ms - 8388604 ms (~17m - ~2h) 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) That's a worst case inaccuracy of 12.5% for the timers which are queued at the beginning of a level. So the new wheel concept addresses the old issues: 1) Cascading is avoided completely 2) By keeping the timers in the bucket until expiry/cancelation we can track the buckets which have timers enqueued in a bucket bitmap and therefore can look up the next expiring timer very fast and O(1). A further benefit of the concept is that the slack calculation which is done on every timer start is no longer necessary because the granularity levels provide natural batching already. Our extensive testing with various loads did not show any performance degradation vs. the current wheel implementation. This patch does not address the 'fast lookup' issue as we wanted to make sure that there is no regression introduced by the wheel redesign. The optimizations are in follow up patches. This patch contains fixes from Anna-Maria Gleixner and Richard Cochran. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.108621834@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 2 + kernel/time/timer.c | 819 ++++++++++++++++++++++++------------------ 2 files changed, 464 insertions(+), 357 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 989f33d16ebf..5869ab9848fe 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -64,6 +64,8 @@ struct timer_list { #define TIMER_DEFERRABLE 0x00080000 #define TIMER_PINNED 0x00100000 #define TIMER_IRQSAFE 0x00200000 +#define TIMER_ARRAYSHIFT 22 +#define TIMER_ARRAYMASK 0xFFC00000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f259a3ef4577..86e95b72665d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -59,43 +59,151 @@ __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); /* - * per-CPU timer vector definitions: + * The timer wheel has LVL_DEPTH array levels. Each level provides an array of + * LVL_SIZE buckets. Each level is driven by its own clock and therefor each + * level has a different granularity. + * + * The level granularity is: LVL_CLK_DIV ^ lvl + * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) + * + * The array level of a newly armed timer depends on the relative expiry + * time. The farther the expiry time is away the higher the array level and + * therefor the granularity becomes. + * + * Contrary to the original timer wheel implementation, which aims for 'exact' + * expiry of the timers, this implementation removes the need for recascading + * the timers into the lower array levels. The previous 'classic' timer wheel + * implementation of the kernel already violated the 'exact' expiry by adding + * slack to the expiry time to provide batched expiration. The granularity + * levels provide implicit batching. + * + * This is an optimization of the original timer wheel implementation for the + * majority of the timer wheel use cases: timeouts. The vast majority of + * timeout timers (networking, disk I/O ...) are canceled before expiry. If + * the timeout expires it indicates that normal operation is disturbed, so it + * does not matter much whether the timeout comes with a slight delay. + * + * The only exception to this are networking timers with a small expiry + * time. They rely on the granularity. Those fit into the first wheel level, + * which has HZ granularity. + * + * We don't have cascading anymore. timers with a expiry time above the + * capacity of the last wheel level are force expired at the maximum timeout + * value of the last wheel level. From data sampling we know that the maximum + * value observed is 5 days (network connection tracking), so this should not + * be an issue. + * + * The currently chosen array constants values are a good compromise between + * array size and granularity. + * + * This results in the following granularity and range levels: + * + * HZ 1000 steps + * Level Offset Granularity Range + * 0 0 1 ms 0 ms - 63 ms + * 1 64 8 ms 64 ms - 511 ms + * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s) + * 3 192 512 ms 4096 ms - 32767 ms (~4s - ~32s) + * 4 256 4096 ms (~4s) 32768 ms - 262143 ms (~32s - ~4m) + * 5 320 32768 ms (~32s) 262144 ms - 2097151 ms (~4m - ~34m) + * 6 384 262144 ms (~4m) 2097152 ms - 16777215 ms (~34m - ~4h) + * 7 448 2097152 ms (~34m) 16777216 ms - 134217727 ms (~4h - ~1d) + * 8 512 16777216 ms (~4h) 134217728 ms - 1073741822 ms (~1d - ~12d) + * + * HZ 300 + * Level Offset Granularity Range + * 0 0 3 ms 0 ms - 210 ms + * 1 64 26 ms 213 ms - 1703 ms (213ms - ~1s) + * 2 128 213 ms 1706 ms - 13650 ms (~1s - ~13s) + * 3 192 1706 ms (~1s) 13653 ms - 109223 ms (~13s - ~1m) + * 4 256 13653 ms (~13s) 109226 ms - 873810 ms (~1m - ~14m) + * 5 320 109226 ms (~1m) 873813 ms - 6990503 ms (~14m - ~1h) + * 6 384 873813 ms (~14m) 6990506 ms - 55924050 ms (~1h - ~15h) + * 7 448 6990506 ms (~1h) 55924053 ms - 447392423 ms (~15h - ~5d) + * 8 512 55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d) + * + * HZ 250 + * Level Offset Granularity Range + * 0 0 4 ms 0 ms - 255 ms + * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s) + * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s) + * 3 192 2048 ms (~2s) 16384 ms - 131071 ms (~16s - ~2m) + * 4 256 16384 ms (~16s) 131072 ms - 1048575 ms (~2m - ~17m) + * 5 320 131072 ms (~2m) 1048576 ms - 8388607 ms (~17m - ~2h) + * 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) + * 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) + * 8 512 67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d) + * + * HZ 100 + * Level Offset Granularity Range + * 0 0 10 ms 0 ms - 630 ms + * 1 64 80 ms 640 ms - 5110 ms (640ms - ~5s) + * 2 128 640 ms 5120 ms - 40950 ms (~5s - ~40s) + * 3 192 5120 ms (~5s) 40960 ms - 327670 ms (~40s - ~5m) + * 4 256 40960 ms (~40s) 327680 ms - 2621430 ms (~5m - ~43m) + * 5 320 327680 ms (~5m) 2621440 ms - 20971510 ms (~43m - ~5h) + * 6 384 2621440 ms (~43m) 20971520 ms - 167772150 ms (~5h - ~1d) + * 7 448 20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d) */ -#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) -#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) -#define TVN_SIZE (1 << TVN_BITS) -#define TVR_SIZE (1 << TVR_BITS) -#define TVN_MASK (TVN_SIZE - 1) -#define TVR_MASK (TVR_SIZE - 1) -#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) -struct tvec { - struct hlist_head vec[TVN_SIZE]; -}; +/* Clock divisor for the next level */ +#define LVL_CLK_SHIFT 3 +#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT) +#define LVL_CLK_MASK (LVL_CLK_DIV - 1) +#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT) +#define LVL_GRAN(n) (1UL << LVL_SHIFT(n)) -struct tvec_root { - struct hlist_head vec[TVR_SIZE]; -}; +/* + * The time start value for each level to select the bucket at enqueue + * time. + */ +#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) + +/* Size of each clock level */ +#define LVL_BITS 6 +#define LVL_SIZE (1UL << LVL_BITS) +#define LVL_MASK (LVL_SIZE - 1) +#define LVL_OFFS(n) ((n) * LVL_SIZE) + +/* Level depth */ +#if HZ > 100 +# define LVL_DEPTH 9 +# else +# define LVL_DEPTH 8 +#endif + +/* The cutoff (max. capacity of the wheel) */ +#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH)) +#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1)) + +/* + * The resulting wheel size. If NOHZ is configured we allocate two + * wheels so we have a separate storage for the deferrable timers. + */ +#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) + +#ifdef CONFIG_NO_HZ_COMMON +# define NR_BASES 2 +# define BASE_STD 0 +# define BASE_DEF 1 +#else +# define NR_BASES 1 +# define BASE_STD 0 +# define BASE_DEF 0 +#endif struct timer_base { - spinlock_t lock; - struct timer_list *running_timer; - unsigned long clk; - unsigned long next_timer; - unsigned long active_timers; - unsigned long all_timers; - int cpu; - bool migration_enabled; - bool nohz_active; - struct tvec_root tv1; - struct tvec tv2; - struct tvec tv3; - struct tvec tv4; - struct tvec tv5; + spinlock_t lock; + struct timer_list *running_timer; + unsigned long clk; + unsigned int cpu; + bool migration_enabled; + bool nohz_active; + DECLARE_BITMAP(pending_map, WHEEL_SIZE); + struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; - -static DEFINE_PER_CPU(struct timer_base, timer_bases); +static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; @@ -106,15 +214,17 @@ void timers_update_migration(bool update_nohz) unsigned int cpu; /* Avoid the loop, if nothing to update */ - if (this_cpu_read(timer_bases.migration_enabled) == on) + if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) return; for_each_possible_cpu(cpu) { - per_cpu(timer_bases.migration_enabled, cpu) = on; + per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; + per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; per_cpu(hrtimer_bases.migration_enabled, cpu) = on; if (!update_nohz) continue; - per_cpu(timer_bases.nohz_active, cpu) = true; + per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; + per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; per_cpu(hrtimer_bases.nohz_active, cpu) = true; } } @@ -133,20 +243,6 @@ int timer_migration_handler(struct ctl_table *table, int write, mutex_unlock(&mutex); return ret; } - -static inline struct timer_base *get_target_base(struct timer_base *base, - int pinned) -{ - if (pinned || !base->migration_enabled) - return this_cpu_ptr(&timer_bases); - return per_cpu_ptr(&timer_bases, get_nohz_timer_target()); -} -#else -static inline struct timer_base *get_target_base(struct timer_base *base, - int pinned) -{ - return this_cpu_ptr(&timer_bases); -} #endif static unsigned long round_jiffies_common(unsigned long j, int cpu, @@ -370,78 +466,91 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); +static inline unsigned int timer_get_idx(struct timer_list *timer) +{ + return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT; +} + +static inline void timer_set_idx(struct timer_list *timer, unsigned int idx) +{ + timer->flags = (timer->flags & ~TIMER_ARRAYMASK) | + idx << TIMER_ARRAYSHIFT; +} + +/* + * Helper function to calculate the array index for a given expiry + * time. + */ +static inline unsigned calc_index(unsigned expires, unsigned lvl) +{ + expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); + return LVL_OFFS(lvl) + (expires & LVL_MASK); +} + static void __internal_add_timer(struct timer_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; - unsigned long idx = expires - base->clk; + unsigned long delta = expires - base->clk; struct hlist_head *vec; + unsigned int idx; - if (idx < TVR_SIZE) { - int i = expires & TVR_MASK; - vec = base->tv1.vec + i; - } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { - int i = (expires >> TVR_BITS) & TVN_MASK; - vec = base->tv2.vec + i; - } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = base->tv3.vec + i; - } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = base->tv4.vec + i; - } else if ((signed long) idx < 0) { - /* - * Can happen if you add a timer with expires == jiffies, - * or you set a timer to go off in the past - */ - vec = base->tv1.vec + (base->clk & TVR_MASK); + if (delta < LVL_START(1)) { + idx = calc_index(expires, 0); + } else if (delta < LVL_START(2)) { + idx = calc_index(expires, 1); + } else if (delta < LVL_START(3)) { + idx = calc_index(expires, 2); + } else if (delta < LVL_START(4)) { + idx = calc_index(expires, 3); + } else if (delta < LVL_START(5)) { + idx = calc_index(expires, 4); + } else if (delta < LVL_START(6)) { + idx = calc_index(expires, 5); + } else if (delta < LVL_START(7)) { + idx = calc_index(expires, 6); + } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { + idx = calc_index(expires, 7); + } else if ((long) delta < 0) { + idx = base->clk & LVL_MASK; } else { - int i; - /* If the timeout is larger than MAX_TVAL (on 64-bit - * architectures or with CONFIG_BASE_SMALL=1) then we - * use the maximum timeout. + /* + * Force expire obscene large timeouts to expire at the + * capacity limit of the wheel. */ - if (idx > MAX_TVAL) { - idx = MAX_TVAL; - expires = idx + base->clk; - } - i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = base->tv5.vec + i; - } + if (expires >= WHEEL_TIMEOUT_CUTOFF) + expires = WHEEL_TIMEOUT_MAX; + idx = calc_index(expires, LVL_DEPTH - 1); + } + /* + * Enqueue the timer into the array bucket, mark it pending in + * the bitmap and store the index in the timer flags. + */ + vec = base->vectors + idx; hlist_add_head(&timer->entry, vec); + __set_bit(idx, base->pending_map); + timer_set_idx(timer, idx); } static void internal_add_timer(struct timer_base *base, struct timer_list *timer) { - /* Advance base->jiffies, if the base is empty */ - if (!base->all_timers++) - base->clk = jiffies; - __internal_add_timer(base, timer); - /* - * Update base->active_timers and base->next_timer - */ - if (!(timer->flags & TIMER_DEFERRABLE)) { - if (!base->active_timers++ || - time_before(timer->expires, base->next_timer)) - base->next_timer = timer->expires; - } /* * Check whether the other CPU is in dynticks mode and needs - * to be triggered to reevaluate the timer wheel. - * We are protected against the other CPU fiddling - * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to stop its tick can not - * evaluate the timer wheel. + * to be triggered to reevaluate the timer wheel. We are + * protected against the other CPU fiddling with the timer by + * holding the timer base lock. This also makes sure that a + * CPU on the way to stop its tick can not evaluate the timer + * wheel. * * Spare the IPI for deferrable timers on idle targets though. * The next busy ticks will take care of it. Except full dynticks * require special care against races with idle_cpu(), lets deal * with that later. */ - if (base->nohz_active) { + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) { if (!(timer->flags & TIMER_DEFERRABLE) || tick_nohz_full_cpu(base->cpu)) wake_up_nohz_cpu(base->cpu); @@ -706,54 +815,87 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending) entry->next = LIST_POISON2; } -static inline void -detach_expired_timer(struct timer_list *timer, struct timer_base *base) -{ - detach_timer(timer, true); - if (!(timer->flags & TIMER_DEFERRABLE)) - base->active_timers--; - base->all_timers--; -} - static int detach_if_pending(struct timer_list *timer, struct timer_base *base, bool clear_pending) { + unsigned idx = timer_get_idx(timer); + if (!timer_pending(timer)) return 0; + if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) + __clear_bit(idx, base->pending_map); + detach_timer(timer, clear_pending); - if (!(timer->flags & TIMER_DEFERRABLE)) { - base->active_timers--; - if (timer->expires == base->next_timer) - base->next_timer = base->clk; - } - /* If this was the last timer, advance base->jiffies */ - if (!--base->all_timers) - base->clk = jiffies; return 1; } +static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) +{ + struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); + + /* + * If the timer is deferrable and nohz is active then we need to use + * the deferrable base. + */ + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && + (tflags & TIMER_DEFERRABLE)) + base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); + return base; +} + +static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) +{ + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + + /* + * If the timer is deferrable and nohz is active then we need to use + * the deferrable base. + */ + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && + (tflags & TIMER_DEFERRABLE)) + base = this_cpu_ptr(&timer_bases[BASE_DEF]); + return base; +} + +static inline struct timer_base *get_timer_base(u32 tflags) +{ + return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); +} + +static inline struct timer_base *get_target_base(struct timer_base *base, + unsigned tflags) +{ +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) + if ((tflags & TIMER_PINNED) || !base->migration_enabled) + return get_timer_this_cpu_base(tflags); + return get_timer_cpu_base(tflags, get_nohz_timer_target()); +#else + return get_timer_this_cpu_base(tflags); +#endif +} + /* - * We are using hashed locking: holding per_cpu(timer_bases).lock - * means that all timers which are tied to this base via timer->base are - * locked, and the base itself is locked too. + * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means + * that all timers which are tied to this base are locked, and the base itself + * is locked too. * * So __run_timers/migrate_timers can safely modify all timers which could - * be found on ->tvX lists. + * be found in the base->vectors array. * - * When the timer's base is locked and removed from the list, the - * TIMER_MIGRATING flag is set, FIXME + * When a timer is migrating then the TIMER_MIGRATING flag is set and we need + * to wait until the migration is done. */ static struct timer_base *lock_timer_base(struct timer_list *timer, - unsigned long *flags) + unsigned long *flags) __acquires(timer->base->lock) { for (;;) { - u32 tf = timer->flags; struct timer_base *base; + u32 tf = timer->flags; if (!(tf & TIMER_MIGRATING)) { - base = per_cpu_ptr(&timer_bases, tf & TIMER_CPUMASK); + base = get_timer_base(tf); spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; @@ -770,6 +912,27 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) unsigned long flags; int ret = 0; + /* + * TODO: Calculate the array bucket of the timer right here w/o + * holding the base lock. This allows to check not only + * timer->expires == expires below, but also whether the timer + * ends up in the same bucket. If we really need to requeue + * the timer then we check whether base->clk have + * advanced between here and locking the timer base. If + * jiffies advanced we have to recalc the array bucket with the + * lock held. + */ + + /* + * This is a common optimization triggered by the + * networking code - if the timer is re-modified + * to be the same thing then just return: + */ + if (timer_pending(timer)) { + if (timer->expires == expires) + return 1; + } + timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); @@ -781,15 +944,15 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags & TIMER_PINNED); + new_base = get_target_base(base, timer->flags); if (base != new_base) { /* - * We are trying to schedule the timer on the local CPU. + * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, * otherwise del_timer_sync() can't detect that the timer's - * handler yet has not finished. This also guarantees that - * the timer is serialized wrt itself. + * handler yet has not finished. This also guarantees that the + * timer is serialized wrt itself. */ if (likely(base->running_timer != timer)) { /* See the comment in lock_timer_base() */ @@ -828,45 +991,6 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) } EXPORT_SYMBOL(mod_timer_pending); -/* - * Decide where to put the timer while taking the slack into account - * - * Algorithm: - * 1) calculate the maximum (absolute) time - * 2) calculate the highest bit where the expires and new max are different - * 3) use this bit to make a mask - * 4) use the bitmask to round down the maximum time, so that all last - * bits are zeros - */ -static inline -unsigned long apply_slack(struct timer_list *timer, unsigned long expires) -{ - unsigned long expires_limit, mask; - int bit; - - if (timer->slack >= 0) { - expires_limit = expires + timer->slack; - } else { - long delta = expires - jiffies; - - if (delta < 256) - return expires; - - expires_limit = expires + delta / 256; - } - mask = expires ^ expires_limit; - if (mask == 0) - return expires; - - bit = __fls(mask); - - mask = (1UL << bit) - 1; - - expires_limit = expires_limit & ~(mask); - - return expires_limit; -} - /** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -889,16 +1013,6 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) */ int mod_timer(struct timer_list *timer, unsigned long expires) { - expires = apply_slack(timer, expires); - - /* - * This is a common optimization triggered by the - * networking code - if the timer is re-modified - * to be the same thing then just return: - */ - if (timer_pending(timer) && timer->expires == expires) - return 1; - return __mod_timer(timer, expires, false); } EXPORT_SYMBOL(mod_timer); @@ -933,13 +1047,14 @@ EXPORT_SYMBOL(add_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - struct timer_base *new_base = per_cpu_ptr(&timer_bases, cpu); - struct timer_base *base; + struct timer_base *new_base, *base; unsigned long flags; timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); + new_base = get_timer_cpu_base(timer->flags, cpu); + /* * If @timer was on a different CPU, it should be migrated with the * old base locked to prevent other operations proceeding with the @@ -1085,27 +1200,6 @@ int del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(del_timer_sync); #endif -static int cascade(struct timer_base *base, struct tvec *tv, int index) -{ - /* cascade all the timers from tv up one level */ - struct timer_list *timer; - struct hlist_node *tmp; - struct hlist_head tv_list; - - hlist_move_list(tv->vec + index, &tv_list); - - /* - * We are removing _all_ timers from the list, so we - * don't have to detach them individually. - */ - hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) { - /* No accounting, while moving them */ - __internal_add_timer(base, timer); - } - - return index; -} - static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), unsigned long data) { @@ -1149,68 +1243,80 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), } } -#define INDEX(N) ((base->clk >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) +static void expire_timers(struct timer_base *base, struct hlist_head *head) +{ + while (!hlist_empty(head)) { + struct timer_list *timer; + void (*fn)(unsigned long); + unsigned long data; + + timer = hlist_entry(head->first, struct timer_list, entry); + timer_stats_account_timer(timer); + + base->running_timer = timer; + detach_timer(timer, true); + + fn = timer->function; + data = timer->data; + + if (timer->flags & TIMER_IRQSAFE) { + spin_unlock(&base->lock); + call_timer_fn(timer, fn, data); + spin_lock(&base->lock); + } else { + spin_unlock_irq(&base->lock); + call_timer_fn(timer, fn, data); + spin_lock_irq(&base->lock); + } + } +} + +static int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) +{ + unsigned long clk = base->clk; + struct hlist_head *vec; + int i, levels = 0; + unsigned int idx; + + for (i = 0; i < LVL_DEPTH; i++) { + idx = (clk & LVL_MASK) + i * LVL_SIZE; + + if (__test_and_clear_bit(idx, base->pending_map)) { + vec = base->vectors + idx; + hlist_move_list(vec, heads++); + levels++; + } + /* Is it time to look at the next level? */ + if (clk & LVL_CLK_MASK) + break; + /* Shift clock for the next level granularity */ + clk >>= LVL_CLK_SHIFT; + } + return levels; +} /** * __run_timers - run all expired timers (if any) on this CPU. * @base: the timer vector to be processed. - * - * This function cascades all vectors and executes all expired timer - * vectors. */ static inline void __run_timers(struct timer_base *base) { - struct timer_list *timer; + struct hlist_head heads[LVL_DEPTH]; + int levels; + + if (!time_after_eq(jiffies, base->clk)) + return; spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->clk)) { - struct hlist_head work_list; - struct hlist_head *head = &work_list; - int index; - if (!base->all_timers) { - base->clk = jiffies; - break; - } + levels = collect_expired_timers(base, heads); + base->clk++; - index = base->clk & TVR_MASK; - - /* - * Cascade timers: - */ - if (!index && - (!cascade(base, &base->tv2, INDEX(0))) && - (!cascade(base, &base->tv3, INDEX(1))) && - !cascade(base, &base->tv4, INDEX(2))) - cascade(base, &base->tv5, INDEX(3)); - ++base->clk; - hlist_move_list(base->tv1.vec + index, head); - while (!hlist_empty(head)) { - void (*fn)(unsigned long); - unsigned long data; - bool irqsafe; - - timer = hlist_entry(head->first, struct timer_list, entry); - fn = timer->function; - data = timer->data; - irqsafe = timer->flags & TIMER_IRQSAFE; - - timer_stats_account_timer(timer); - - base->running_timer = timer; - detach_expired_timer(timer, base); - - if (irqsafe) { - spin_unlock(&base->lock); - call_timer_fn(timer, fn, data); - spin_lock(&base->lock); - } else { - spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn, data); - spin_lock_irq(&base->lock); - } - } + while (levels--) + expire_timers(base, heads + levels); } base->running_timer = NULL; spin_unlock_irq(&base->lock); @@ -1218,78 +1324,87 @@ static inline void __run_timers(struct timer_base *base) #ifdef CONFIG_NO_HZ_COMMON /* - * Find out when the next timer event is due to happen. This - * is used on S/390 to stop all activity when a CPU is idle. - * This function needs to be called with interrupts disabled. + * Find the next pending bucket of a level. Search from @offset + @clk upwards + * and if nothing there, search from start of the level (@offset) up to + * @offset + clk. + */ +static int next_pending_bucket(struct timer_base *base, unsigned offset, + unsigned clk) +{ + unsigned pos, start = offset + clk; + unsigned end = offset + LVL_SIZE; + + pos = find_next_bit(base->pending_map, end, start); + if (pos < end) + return pos - start; + + pos = find_next_bit(base->pending_map, start, offset); + return pos < start ? pos + LVL_SIZE - start : -1; +} + +/* + * Search the first expiring timer in the various clock levels. */ static unsigned long __next_timer_interrupt(struct timer_base *base) { - unsigned long clk = base->clk; - unsigned long expires = clk + NEXT_TIMER_MAX_DELTA; - int index, slot, array, found = 0; - struct timer_list *nte; - struct tvec *varray[4]; + unsigned long clk, next, adj; + unsigned lvl, offset = 0; - /* Look for timer events in tv1. */ - index = slot = clk & TVR_MASK; - do { - hlist_for_each_entry(nte, base->tv1.vec + slot, entry) { - if (nte->flags & TIMER_DEFERRABLE) - continue; + spin_lock(&base->lock); + next = base->clk + NEXT_TIMER_MAX_DELTA; + clk = base->clk; + for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { + int pos = next_pending_bucket(base, offset, clk & LVL_MASK); - found = 1; - expires = nte->expires; - /* Look at the cascade bucket(s)? */ - if (!index || slot < index) - goto cascade; - return expires; + if (pos >= 0) { + unsigned long tmp = clk + (unsigned long) pos; + + tmp <<= LVL_SHIFT(lvl); + if (time_before(tmp, next)) + next = tmp; } - slot = (slot + 1) & TVR_MASK; - } while (slot != index); - -cascade: - /* Calculate the next cascade event */ - if (index) - clk += TVR_SIZE - index; - clk >>= TVR_BITS; - - /* Check tv2-tv5. */ - varray[0] = &base->tv2; - varray[1] = &base->tv3; - varray[2] = &base->tv4; - varray[3] = &base->tv5; - - for (array = 0; array < 4; array++) { - struct tvec *varp = varray[array]; - - index = slot = clk & TVN_MASK; - do { - hlist_for_each_entry(nte, varp->vec + slot, entry) { - if (nte->flags & TIMER_DEFERRABLE) - continue; - - found = 1; - if (time_before(nte->expires, expires)) - expires = nte->expires; - } - /* - * Do we still search for the first timer or are - * we looking up the cascade buckets ? - */ - if (found) { - /* Look at the cascade bucket(s)? */ - if (!index || slot < index) - break; - return expires; - } - slot = (slot + 1) & TVN_MASK; - } while (slot != index); - - if (index) - clk += TVN_SIZE - index; - clk >>= TVN_BITS; + /* + * Clock for the next level. If the current level clock lower + * bits are zero, we look at the next level as is. If not we + * need to advance it by one because that's going to be the + * next expiring bucket in that level. base->clk is the next + * expiring jiffie. So in case of: + * + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 + * 0 0 0 0 0 0 + * + * we have to look at all levels @index 0. With + * + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 + * 0 0 0 0 0 2 + * + * LVL0 has the next expiring bucket @index 2. The upper + * levels have the next expiring bucket @index 1. + * + * In case that the propagation wraps the next level the same + * rules apply: + * + * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 + * 0 0 0 0 F 2 + * + * So after looking at LVL0 we get: + * + * LVL5 LVL4 LVL3 LVL2 LVL1 + * 0 0 0 1 0 + * + * So no propagation from LVL1 to LVL2 because that happened + * with the add already, but then we need to propagate further + * from LVL2 to LVL3. + * + * So the simple check whether the lower bits of the current + * level are 0 or not is sufficient for all cases. + */ + adj = clk & LVL_CLK_MASK ? 1 : 0; + clk >>= LVL_CLK_SHIFT; + clk += adj; } - return expires; + spin_unlock(&base->lock); + return next; } /* @@ -1335,7 +1450,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { - struct timer_base *base = this_cpu_ptr(&timer_bases); + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); u64 expires = KTIME_MAX; unsigned long nextevt; @@ -1346,17 +1461,11 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (cpu_is_offline(smp_processor_id())) return expires; - spin_lock(&base->lock); - if (base->active_timers) { - if (time_before_eq(base->next_timer, base->clk)) - base->next_timer = __next_timer_interrupt(base); - nextevt = base->next_timer; - if (time_before_eq(nextevt, basej)) - expires = basem; - else - expires = basem + (nextevt - basej) * TICK_NSEC; - } - spin_unlock(&base->lock); + nextevt = __next_timer_interrupt(base); + if (time_before_eq(nextevt, basej)) + expires = basem; + else + expires = basem + (nextevt - basej) * TICK_NSEC; return cmp_next_hrtimer_event(basem, expires); } @@ -1387,10 +1496,11 @@ void update_process_times(int user_tick) */ static void run_timer_softirq(struct softirq_action *h) { - struct timer_base *base = this_cpu_ptr(&timer_bases); + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); - if (time_after_eq(jiffies, base->clk)) - __run_timers(base); + __run_timers(base); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } /* @@ -1541,7 +1651,6 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h while (!hlist_empty(head)) { timer = hlist_entry(head->first, struct timer_list, entry); - /* We ignore the accounting on the dying cpu */ detach_timer(timer, false); timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; internal_add_timer(new_base, timer); @@ -1552,35 +1661,29 @@ static void migrate_timers(int cpu) { struct timer_base *old_base; struct timer_base *new_base; - int i; + int b, i; BUG_ON(cpu_online(cpu)); - old_base = per_cpu_ptr(&timer_bases, cpu); - new_base = get_cpu_ptr(&timer_bases); - /* - * The caller is globally serialized and nobody else - * takes two locks at once, deadlock is not possible. - */ - spin_lock_irq(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - BUG_ON(old_base->running_timer); + for (b = 0; b < NR_BASES; b++) { + old_base = per_cpu_ptr(&timer_bases[b], cpu); + new_base = get_cpu_ptr(&timer_bases[b]); + /* + * The caller is globally serialized and nobody else + * takes two locks at once, deadlock is not possible. + */ + spin_lock_irq(&new_base->lock); + spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - for (i = 0; i < TVR_SIZE; i++) - migrate_timer_list(new_base, old_base->tv1.vec + i); - for (i = 0; i < TVN_SIZE; i++) { - migrate_timer_list(new_base, old_base->tv2.vec + i); - migrate_timer_list(new_base, old_base->tv3.vec + i); - migrate_timer_list(new_base, old_base->tv4.vec + i); - migrate_timer_list(new_base, old_base->tv5.vec + i); + BUG_ON(old_base->running_timer); + + for (i = 0; i < WHEEL_SIZE; i++) + migrate_timer_list(new_base, old_base->vectors + i); + + spin_unlock(&old_base->lock); + spin_unlock_irq(&new_base->lock); + put_cpu_ptr(&timer_bases); } - - old_base->active_timers = 0; - old_base->all_timers = 0; - - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); - put_cpu_ptr(&timer_bases); } static int timer_cpu_notify(struct notifier_block *self, @@ -1608,13 +1711,15 @@ static inline void timer_register_cpu_notifier(void) { } static void __init init_timer_cpu(int cpu) { - struct timer_base *base = per_cpu_ptr(&timer_bases, cpu); + struct timer_base *base; + int i; - base->cpu = cpu; - spin_lock_init(&base->lock); - - base->clk = jiffies; - base->next_timer = base->clk; + for (i = 0; i < NR_BASES; i++) { + base = per_cpu_ptr(&timer_bases[i], cpu); + base->cpu = cpu; + spin_lock_init(&base->lock); + base->clk = jiffies; + } } static void __init init_timer_cpus(void) From 53bf837b78d155b8e1110b3c25b4d0d6391b8ff3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:31 +0000 Subject: [PATCH 1043/1050] timers: Remove set_timer_slack() leftovers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have implicit batching in the timer wheel. The slack API is no longer used, so remove it. Signed-off-by: Thomas Gleixner Cc: Alan Stern Cc: Andrew F. Davis Cc: Arjan van de Ven Cc: Chris Mason Cc: David S. Miller Cc: David Woodhouse Cc: Dmitry Eremin-Solenikov Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Greg Kroah-Hartman Cc: Jaehoon Chung Cc: Jens Axboe Cc: John Stultz Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Mathias Nyman Cc: Pali Rohár Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sebastian Reichel Cc: Ulf Hansson Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: netdev@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.189813118@linutronix.de Signed-off-by: Ingo Molnar --- block/genhd.c | 5 ----- drivers/mmc/host/jz4740_mmc.c | 2 -- drivers/power/bq27xxx_battery.c | 5 +---- drivers/usb/host/ohci-hcd.c | 1 - drivers/usb/host/xhci.c | 2 -- include/linux/timer.h | 4 ---- kernel/time/timer.c | 19 ------------------- lib/random32.c | 1 - 8 files changed, 1 insertion(+), 38 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 9f42526b4d62..f06d7f3b075b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1523,12 +1523,7 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) if (--ev->block) goto out_unlock; - /* - * Not exactly a latency critical operation, set poll timer - * slack to 25% and kick event check. - */ intv = disk_events_poll_jiffies(disk); - set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) queue_delayed_work(system_freezable_power_efficient_wq, &ev->dwork, 0); diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 03ddf0ecf402..684087db170b 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1068,8 +1068,6 @@ static int jz4740_mmc_probe(struct platform_device* pdev) jz4740_mmc_clock_disable(host); setup_timer(&host->timeout_timer, jz4740_mmc_timeout, (unsigned long)host); - /* It is not important when it times out, it just needs to timeout. */ - set_timer_slack(&host->timeout_timer, HZ); host->use_dma = true; if (host->use_dma && jz4740_mmc_acquire_dma_channels(host) != 0) diff --git a/drivers/power/bq27xxx_battery.c b/drivers/power/bq27xxx_battery.c index 45f6ebf88df6..e90b3f307e0f 100644 --- a/drivers/power/bq27xxx_battery.c +++ b/drivers/power/bq27xxx_battery.c @@ -735,11 +735,8 @@ static void bq27xxx_battery_poll(struct work_struct *work) bq27xxx_battery_update(di); - if (poll_interval > 0) { - /* The timer does not have to be accurate. */ - set_timer_slack(&di->work.timer, poll_interval * HZ / 4); + if (poll_interval > 0) schedule_delayed_work(&di->work, poll_interval * HZ); - } } /* diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 0449235d4f22..1700908b84ef 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -500,7 +500,6 @@ static int ohci_init (struct ohci_hcd *ohci) setup_timer(&ohci->io_watchdog, io_watchdog_func, (unsigned long) ohci); - set_timer_slack(&ohci->io_watchdog, msecs_to_jiffies(20)); ohci->hcca = dma_alloc_coherent (hcd->self.controller, sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f2f9518c53ab..a986fe763d87 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -490,8 +490,6 @@ static void compliance_mode_recovery_timer_init(struct xhci_hcd *xhci) xhci->comp_mode_recovery_timer.expires = jiffies + msecs_to_jiffies(COMP_MODE_RCVRY_MSECS); - set_timer_slack(&xhci->comp_mode_recovery_timer, - msecs_to_jiffies(COMP_MODE_RCVRY_MSECS)); add_timer(&xhci->comp_mode_recovery_timer); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "Compliance mode recovery timer initialized"); diff --git a/include/linux/timer.h b/include/linux/timer.h index 5869ab9848fe..4419506b564e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -19,7 +19,6 @@ struct timer_list { void (*function)(unsigned long); unsigned long data; u32 flags; - int slack; #ifdef CONFIG_TIMER_STATS int start_pid; @@ -73,7 +72,6 @@ struct timer_list { .expires = (_expires), \ .data = (_data), \ .flags = (_flags), \ - .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } @@ -193,8 +191,6 @@ extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); -extern void set_timer_slack(struct timer_list *time, int slack_hz); - /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 86e95b72665d..a83e23d0bc25 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -447,24 +447,6 @@ unsigned long round_jiffies_up_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); -/** - * set_timer_slack - set the allowed slack for a timer - * @timer: the timer to be modified - * @slack_hz: the amount of time (in jiffies) allowed for rounding - * - * Set the amount of time, in jiffies, that a certain timer has - * in terms of slack. By setting this value, the timer subsystem - * will schedule the actual timer somewhere between - * the time mod_timer() asks for, and that time plus the slack. - * - * By setting the slack to -1, a percentage of the delay is used - * instead. - */ -void set_timer_slack(struct timer_list *timer, int slack_hz) -{ - timer->slack = slack_hz; -} -EXPORT_SYMBOL_GPL(set_timer_slack); static inline unsigned int timer_get_idx(struct timer_list *timer) { @@ -775,7 +757,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, { timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); - timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; diff --git a/lib/random32.c b/lib/random32.c index 510d1ce7d4d2..69ed593aab07 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare) static void __init __prandom_start_seed_timer(void) { - set_timer_slack(&seed_timer, HZ); seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); add_timer(&seed_timer); } From 73420fea80c6c376d91a69defe64013baa0d7e95 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 4 Jul 2016 09:50:33 +0000 Subject: [PATCH 1044/1050] timers: Move __run_timers() function Move __run_timers() below __next_timer_interrupt() and next_pending_bucket() in preparation for __run_timers() NOHZ optimization. No functional change. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.271872665@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a83e23d0bc25..c16c48de01c5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1277,32 +1277,6 @@ static int collect_expired_timers(struct timer_base *base, return levels; } -/** - * __run_timers - run all expired timers (if any) on this CPU. - * @base: the timer vector to be processed. - */ -static inline void __run_timers(struct timer_base *base) -{ - struct hlist_head heads[LVL_DEPTH]; - int levels; - - if (!time_after_eq(jiffies, base->clk)) - return; - - spin_lock_irq(&base->lock); - - while (time_after_eq(jiffies, base->clk)) { - - levels = collect_expired_timers(base, heads); - base->clk++; - - while (levels--) - expire_timers(base, heads + levels); - } - base->running_timer = NULL; - spin_unlock_irq(&base->lock); -} - #ifdef CONFIG_NO_HZ_COMMON /* * Find the next pending bucket of a level. Search from @offset + @clk upwards @@ -1472,6 +1446,32 @@ void update_process_times(int user_tick) run_posix_cpu_timers(p); } +/** + * __run_timers - run all expired timers (if any) on this CPU. + * @base: the timer vector to be processed. + */ +static inline void __run_timers(struct timer_base *base) +{ + struct hlist_head heads[LVL_DEPTH]; + int levels; + + if (!time_after_eq(jiffies, base->clk)) + return; + + spin_lock_irq(&base->lock); + + while (time_after_eq(jiffies, base->clk)) { + + levels = collect_expired_timers(base, heads); + base->clk++; + + while (levels--) + expire_timers(base, heads + levels); + } + base->running_timer = NULL; + spin_unlock_irq(&base->lock); +} + /* * This function runs timers and the timer-tq in bottom half context. */ From 236968383cf5cd48835ff0d8a265e299e220d140 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 4 Jul 2016 09:50:34 +0000 Subject: [PATCH 1045/1050] timers: Optimize collect_expired_timers() for NOHZ After a NOHZ idle sleep the timer wheel must be forwarded to current jiffies. There might be expired timers so the current code loops and checks the expired buckets for timers. This can take quite some time for long NOHZ idle periods. The pending bitmask in the timer base allows us to do a quick search for the next expiring timer and therefore a fast forward of the base time which prevents pointless long lasting loops. For a 3 seconds idle sleep this reduces the catchup time from ~1ms to 5us. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.351296290@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 49 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index c16c48de01c5..658051c97a3c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1252,8 +1252,8 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) } } -static int collect_expired_timers(struct timer_base *base, - struct hlist_head *heads) +static int __collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) { unsigned long clk = base->clk; struct hlist_head *vec; @@ -1279,9 +1279,9 @@ static int collect_expired_timers(struct timer_base *base, #ifdef CONFIG_NO_HZ_COMMON /* - * Find the next pending bucket of a level. Search from @offset + @clk upwards - * and if nothing there, search from start of the level (@offset) up to - * @offset + clk. + * Find the next pending bucket of a level. Search from level start (@offset) + * + @clk upwards and if nothing there, search from start of the level + * (@offset) up to @offset + clk. */ static int next_pending_bucket(struct timer_base *base, unsigned offset, unsigned clk) @@ -1298,14 +1298,14 @@ static int next_pending_bucket(struct timer_base *base, unsigned offset, } /* - * Search the first expiring timer in the various clock levels. + * Search the first expiring timer in the various clock levels. Caller must + * hold base->lock. */ static unsigned long __next_timer_interrupt(struct timer_base *base) { unsigned long clk, next, adj; unsigned lvl, offset = 0; - spin_lock(&base->lock); next = base->clk + NEXT_TIMER_MAX_DELTA; clk = base->clk; for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { @@ -1358,7 +1358,6 @@ static unsigned long __next_timer_interrupt(struct timer_base *base) clk >>= LVL_CLK_SHIFT; clk += adj; } - spin_unlock(&base->lock); return next; } @@ -1416,7 +1415,10 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (cpu_is_offline(smp_processor_id())) return expires; + spin_lock(&base->lock); nextevt = __next_timer_interrupt(base); + spin_unlock(&base->lock); + if (time_before_eq(nextevt, basej)) expires = basem; else @@ -1424,6 +1426,37 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) return cmp_next_hrtimer_event(basem, expires); } + +static int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) +{ + /* + * NOHZ optimization. After a long idle sleep we need to forward the + * base to current jiffies. Avoid a loop by searching the bitfield for + * the next expiring timer. + */ + if ((long)(jiffies - base->clk) > 2) { + unsigned long next = __next_timer_interrupt(base); + + /* + * If the next timer is ahead of time forward to current + * jiffies, otherwise forward to the next expiry time. + */ + if (time_after(next, jiffies)) { + /* The call site will increment clock! */ + base->clk = jiffies - 1; + return 0; + } + base->clk = next; + } + return __collect_expired_timers(base, heads); +} +#else +static inline int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) +{ + return __collect_expired_timers(base, heads); +} #endif /* From ff00673292bd42a3688b33de47252a6a3c3f424c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:35 +0000 Subject: [PATCH 1046/1050] timers/nohz: Remove pointless tick_nohz_kick_tick() function This was a failed attempt to optimize the timer expiry in idle, which was disabled and never revisited. Remove the cruft. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.431073782@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 536ada80f6dd..69abc7bfe80f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1092,35 +1092,6 @@ static void tick_nohz_switch_to_nohz(void) tick_nohz_activate(ts, NOHZ_MODE_LOWRES); } -/* - * When NOHZ is enabled and the tick is stopped, we need to kick the - * tick timer from irq_enter() so that the jiffies update is kept - * alive during long running softirqs. That's ugly as hell, but - * correctness is key even if we need to fix the offending softirq in - * the first place. - * - * Note, this is different to tick_nohz_restart. We just kick the - * timer and do not touch the other magic bits which need to be done - * when idle is left. - */ -static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) -{ -#if 0 - /* Switch back to 2.6.27 behaviour */ - ktime_t delta; - - /* - * Do not touch the tick device, when the next expiry is either - * already reached or less/equal than the tick period. - */ - delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); - if (delta.tv64 <= tick_period.tv64) - return; - - tick_nohz_restart(ts, now); -#endif -} - static inline void tick_nohz_irq_enter(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); @@ -1131,10 +1102,8 @@ static inline void tick_nohz_irq_enter(void) now = ktime_get(); if (ts->idle_active) tick_nohz_stop_idle(ts, now); - if (ts->tick_stopped) { + if (ts->tick_stopped) tick_nohz_update_jiffies(now); - tick_nohz_kick_tick(ts, now); - } } #else From a683f390b93f4d1292f849fc48d28e322046120f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:36 +0000 Subject: [PATCH 1047/1050] timers: Forward the wheel clock whenever possible The wheel clock is stale when a CPU goes into a long idle sleep. This has the side effect that timers which are queued end up in the outer wheel levels. That results in coarser granularity. To solve this, we keep track of the idle state and forward the wheel clock whenever possible. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.512039360@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 1 + kernel/time/tick-sched.c | 12 ++++ kernel/time/timer.c | 132 +++++++++++++++++++++++++++++------- 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 966a5a6fdd0a..f738251000fe 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { } DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); +void timer_clear_idle(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 69abc7bfe80f..5d81f9aa30d2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -700,6 +700,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { tick.tv64 = 0; + + /* + * Tell the timer code that the base is not idle, i.e. undo + * the effect of get_next_timer_interrupt(): + */ + timer_clear_idle(); /* * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. @@ -809,6 +815,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) tick_do_update_jiffies64(now); cpu_load_update_nohz_stop(); + /* + * Clear the timer idle flag, so we avoid IPIs on remote queueing and + * the clock forward checks in the enqueue path: + */ + timer_clear_idle(); + calc_load_exit_idle(); touch_softlockup_watchdog_sched(); /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 658051c97a3c..9339d71ee998 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -196,9 +196,11 @@ struct timer_base { spinlock_t lock; struct timer_list *running_timer; unsigned long clk; + unsigned long next_expiry; unsigned int cpu; bool migration_enabled; bool nohz_active; + bool is_idle; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -519,24 +521,37 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer { __internal_add_timer(base, timer); + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + return; + /* - * Check whether the other CPU is in dynticks mode and needs - * to be triggered to reevaluate the timer wheel. We are - * protected against the other CPU fiddling with the timer by - * holding the timer base lock. This also makes sure that a - * CPU on the way to stop its tick can not evaluate the timer - * wheel. - * - * Spare the IPI for deferrable timers on idle targets though. - * The next busy ticks will take care of it. Except full dynticks - * require special care against races with idle_cpu(), lets deal - * with that later. + * TODO: This wants some optimizing similar to the code below, but we + * will do that when we switch from push to pull for deferrable timers. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) { - if (!(timer->flags & TIMER_DEFERRABLE) || - tick_nohz_full_cpu(base->cpu)) + if (timer->flags & TIMER_DEFERRABLE) { + if (tick_nohz_full_cpu(base->cpu)) wake_up_nohz_cpu(base->cpu); + return; } + + /* + * We might have to IPI the remote CPU if the base is idle and the + * timer is not deferrable. If the other CPU is on the way to idle + * then it can't set base->is_idle as we hold the base lock: + */ + if (!base->is_idle) + return; + + /* Check whether this is the new first expiring timer: */ + if (time_after_eq(timer->expires, base->next_expiry)) + return; + + /* + * Set the next expiry time and kick the CPU so it can reevaluate the + * wheel: + */ + base->next_expiry = timer->expires; + wake_up_nohz_cpu(base->cpu); } #ifdef CONFIG_TIMER_STATS @@ -844,10 +859,11 @@ static inline struct timer_base *get_timer_base(u32 tflags) return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); } -static inline struct timer_base *get_target_base(struct timer_base *base, - unsigned tflags) +#ifdef CONFIG_NO_HZ_COMMON +static inline struct timer_base * +__get_target_base(struct timer_base *base, unsigned tflags) { -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP if ((tflags & TIMER_PINNED) || !base->migration_enabled) return get_timer_this_cpu_base(tflags); return get_timer_cpu_base(tflags, get_nohz_timer_target()); @@ -856,6 +872,43 @@ static inline struct timer_base *get_target_base(struct timer_base *base, #endif } +static inline void forward_timer_base(struct timer_base *base) +{ + /* + * We only forward the base when it's idle and we have a delta between + * base clock and jiffies. + */ + if (!base->is_idle || (long) (jiffies - base->clk) < 2) + return; + + /* + * If the next expiry value is > jiffies, then we fast forward to + * jiffies otherwise we forward to the next expiry value. + */ + if (time_after(base->next_expiry, jiffies)) + base->clk = jiffies; + else + base->clk = base->next_expiry; +} +#else +static inline struct timer_base * +__get_target_base(struct timer_base *base, unsigned tflags) +{ + return get_timer_this_cpu_base(tflags); +} + +static inline void forward_timer_base(struct timer_base *base) { } +#endif + +static inline struct timer_base * +get_target_base(struct timer_base *base, unsigned tflags) +{ + struct timer_base *target = __get_target_base(base, tflags); + + forward_timer_base(target); + return target; +} + /* * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means * that all timers which are tied to this base are locked, and the base itself @@ -1417,16 +1470,49 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) spin_lock(&base->lock); nextevt = __next_timer_interrupt(base); + base->next_expiry = nextevt; + /* + * We have a fresh next event. Check whether we can forward the base: + */ + if (time_after(nextevt, jiffies)) + base->clk = jiffies; + else if (time_after(nextevt, base->clk)) + base->clk = nextevt; + + if (time_before_eq(nextevt, basej)) { + expires = basem; + base->is_idle = false; + } else { + expires = basem + (nextevt - basej) * TICK_NSEC; + /* + * If we expect to sleep more than a tick, mark the base idle: + */ + if ((expires - basem) > TICK_NSEC) + base->is_idle = true; + } spin_unlock(&base->lock); - if (time_before_eq(nextevt, basej)) - expires = basem; - else - expires = basem + (nextevt - basej) * TICK_NSEC; - return cmp_next_hrtimer_event(basem, expires); } +/** + * timer_clear_idle - Clear the idle state of the timer base + * + * Called with interrupts disabled + */ +void timer_clear_idle(void) +{ + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + + /* + * We do this unlocked. The worst outcome is a remote enqueue sending + * a pointless IPI, but taking the lock would just make the window for + * sending the IPI a few instructions smaller for the cost of taking + * the lock in the exit from idle path. + */ + base->is_idle = false; +} + static int collect_expired_timers(struct timer_base *base, struct hlist_head *heads) { @@ -1440,7 +1526,7 @@ static int collect_expired_timers(struct timer_base *base, /* * If the next timer is ahead of time forward to current - * jiffies, otherwise forward to the next expiry time. + * jiffies, otherwise forward to the next expiry time: */ if (time_after(next, jiffies)) { /* The call site will increment clock! */ From 4e85876a9d2a977b4a07389da8c07edf76d10825 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:37 +0000 Subject: [PATCH 1048/1050] timers: Only wake softirq if necessary With the wheel forwading in place and with the HZ=1000 4ms folding we can avoid running the softirq at all. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.607650550@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9339d71ee998..8d830f1f6a6a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1608,7 +1608,18 @@ static void run_timer_softirq(struct softirq_action *h) */ void run_local_timers(void) { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + hrtimer_run_queues(); + /* Raise the softirq only if required. */ + if (time_before(jiffies, base->clk)) { + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + return; + /* CPU is awake, so check the deferrable base. */ + base++; + if (time_before(jiffies, base->clk)) + return; + } raise_softirq(TIMER_SOFTIRQ); } From ffdf047728f8f93df896b58049c7513856027141 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 4 Jul 2016 09:50:39 +0000 Subject: [PATCH 1049/1050] timers: Split out index calculation For further optimizations we need to seperate index calculation from queueing. No functional change. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.691159619@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 47 ++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8d830f1f6a6a..8d7c23e55c85 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -471,12 +471,9 @@ static inline unsigned calc_index(unsigned expires, unsigned lvl) return LVL_OFFS(lvl) + (expires & LVL_MASK); } -static void -__internal_add_timer(struct timer_base *base, struct timer_list *timer) +static int calc_wheel_index(unsigned long expires, unsigned long clk) { - unsigned long expires = timer->expires; - unsigned long delta = expires - base->clk; - struct hlist_head *vec; + unsigned long delta = expires - clk; unsigned int idx; if (delta < LVL_START(1)) { @@ -496,7 +493,7 @@ __internal_add_timer(struct timer_base *base, struct timer_list *timer) } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { idx = calc_index(expires, 7); } else if ((long) delta < 0) { - idx = base->clk & LVL_MASK; + idx = clk & LVL_MASK; } else { /* * Force expire obscene large timeouts to expire at the @@ -507,20 +504,33 @@ __internal_add_timer(struct timer_base *base, struct timer_list *timer) idx = calc_index(expires, LVL_DEPTH - 1); } - /* - * Enqueue the timer into the array bucket, mark it pending in - * the bitmap and store the index in the timer flags. - */ - vec = base->vectors + idx; - hlist_add_head(&timer->entry, vec); + return idx; +} + +/* + * Enqueue the timer into the hash bucket, mark it pending in + * the bitmap and store the index in the timer flags. + */ +static void enqueue_timer(struct timer_base *base, struct timer_list *timer, + unsigned int idx) +{ + hlist_add_head(&timer->entry, base->vectors + idx); __set_bit(idx, base->pending_map); timer_set_idx(timer, idx); } -static void internal_add_timer(struct timer_base *base, struct timer_list *timer) +static void +__internal_add_timer(struct timer_base *base, struct timer_list *timer) { - __internal_add_timer(base, timer); + unsigned int idx; + idx = calc_wheel_index(timer->expires, base->clk); + enqueue_timer(base, timer, idx); +} + +static void +trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) +{ if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) return; @@ -551,7 +561,14 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer * wheel: */ base->next_expiry = timer->expires; - wake_up_nohz_cpu(base->cpu); + wake_up_nohz_cpu(base->cpu); +} + +static void +internal_add_timer(struct timer_base *base, struct timer_list *timer) +{ + __internal_add_timer(base, timer); + trigger_dyntick_cpu(base, timer); } #ifdef CONFIG_TIMER_STATS From f00c0afdfa625165a609513bc74164d56752ec3e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 4 Jul 2016 09:50:40 +0000 Subject: [PATCH 1050/1050] timers: Implement optimization for same expiry time in mod_timer() The existing optimization for same expiry time in mod_timer() checks whether the timer expiry time is the same as the new requested expiry time. In the old timer wheel implementation this does not take the slack batching into account, neither does the new implementation evaluate whether the new expiry time will requeue the timer to the same bucket. To optimize that, we can calculate the resulting bucket and check if the new expiry time is different from the current expiry time. This calculation happens outside the base lock held region. If the resulting bucket is the same we can avoid taking the base lock and requeueing the timer. If the timer needs to be requeued then we have to check under the base lock whether the base time has changed between the lockless calculation and taking the lock. If it has changed we need to recalculate under the lock. This optimization takes effect for timers which are enqueued into the less granular wheel levels (1 and above). With a simple test case the functionality has been verified: Before After Match: 5.5% 86.6% Requeue: 94.5% 13.4% Recalc: <0.01% In the non optimized case the timer is requeued in 94.5% of the cases. With the index optimization in place the requeue rate drops to 13.4%. The case where the lockless index calculation has to be redone is less than 0.01%. With a real world test case (networking) we observed the following changes: Before After Match: 97.8% 99.7% Requeue: 2.2% 0.3% Recalc: <0.001% That means two percent fewer lock/requeue/unlock operations done in one of the hot path use cases of timers. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.778527749@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 51 +++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8d7c23e55c85..8f29abeb8c4d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -960,28 +960,36 @@ static inline int __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { struct timer_base *base, *new_base; - unsigned long flags; + unsigned int idx = UINT_MAX; + unsigned long clk = 0, flags; int ret = 0; /* - * TODO: Calculate the array bucket of the timer right here w/o - * holding the base lock. This allows to check not only - * timer->expires == expires below, but also whether the timer - * ends up in the same bucket. If we really need to requeue - * the timer then we check whether base->clk have - * advanced between here and locking the timer base. If - * jiffies advanced we have to recalc the array bucket with the - * lock held. - */ - - /* - * This is a common optimization triggered by the - * networking code - if the timer is re-modified - * to be the same thing then just return: + * This is a common optimization triggered by the networking code - if + * the timer is re-modified to have the same timeout or ends up in the + * same array bucket then just return: */ if (timer_pending(timer)) { if (timer->expires == expires) return 1; + /* + * Take the current timer_jiffies of base, but without holding + * the lock! + */ + base = get_timer_base(timer->flags); + clk = base->clk; + + idx = calc_wheel_index(expires, clk); + + /* + * Retrieve and compare the array index of the pending + * timer. If it matches set the expiry to the new value so a + * subsequent call will exit in the expires check above. + */ + if (idx == timer_get_idx(timer)) { + timer->expires = expires; + return 1; + } } timer_stats_timer_set_start_info(timer); @@ -1018,7 +1026,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } timer->expires = expires; - internal_add_timer(base, timer); + /* + * If 'idx' was calculated above and the base time did not advance + * between calculating 'idx' and taking the lock, only enqueue_timer() + * and trigger_dyntick_cpu() is required. Otherwise we need to + * (re)calculate the wheel index via internal_add_timer(). + */ + if (idx != UINT_MAX && clk == base->clk) { + enqueue_timer(base, timer, idx); + trigger_dyntick_cpu(base, timer); + } else { + internal_add_timer(base, timer); + } out_unlock: spin_unlock_irqrestore(&base->lock, flags);