From acc455cffa75070d55e74fc7802b49edbc080e92 Mon Sep 17 00:00:00 2001 From: chris hyser Date: Wed, 22 Apr 2015 12:28:31 -0400 Subject: [PATCH 001/481] sparc64: Setup sysfs to mark LDOM sockets, cores and threads correctly commit 5f4826a362405748bbf73957027b77993e61e1af Author: chris hyser Date: Tue Apr 21 10:31:38 2015 -0400 sparc64: Setup sysfs to mark LDOM sockets, cores and threads correctly The current sparc kernel has no representation for sockets though tools like lscpu can pull this from sysfs. This patch walks the machine description cache and socket hierarchy and marks sockets as well as cores and threads such that a representative sysfs is created by drivers/base/topology.c. Before this patch: $ lscpu Architecture: sparc64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Big Endian CPU(s): 1024 On-line CPU(s) list: 0-1023 Thread(s) per core: 8 Core(s) per socket: 1 <--- wrong Socket(s): 128 <--- wrong NUMA node(s): 4 NUMA node0 CPU(s): 0-255 NUMA node1 CPU(s): 256-511 NUMA node2 CPU(s): 512-767 NUMA node3 CPU(s): 768-1023 After this patch: $ lscpu Architecture: sparc64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Big Endian CPU(s): 1024 On-line CPU(s) list: 0-1023 Thread(s) per core: 8 Core(s) per socket: 32 Socket(s): 4 NUMA node(s): 4 NUMA node0 CPU(s): 0-255 NUMA node1 CPU(s): 256-511 NUMA node2 CPU(s): 512-767 NUMA node3 CPU(s): 768-1023 Most of this patch was done by Chris with updates by David. Signed-off-by: Chris Hyser Signed-off-by: David Ahern Signed-off-by: David S. Miller --- arch/sparc/include/asm/cpudata_64.h | 3 +- arch/sparc/include/asm/topology_64.h | 3 +- arch/sparc/kernel/mdesc.c | 138 +++++++++++++++++++++------ arch/sparc/kernel/smp_64.c | 13 +++ 4 files changed, 128 insertions(+), 29 deletions(-) diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6e424d185d0..a6cfdabb6054 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,7 +24,8 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - int core_id; + unsigned short sock_id; + unsigned short core_id; int proc_id; } cpuinfo_sparc; diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index ed8f071132e4..d1761df5cca6 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_map[NR_CPUS]; static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 26c80e18d7b1..6f80936e0eea 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) } } -static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) +static void find_back_node_value(struct mdesc_handle *hp, u64 node, + char *srch_val, + void (*func)(struct mdesc_handle *, u64, int), + u64 val, int depth) { - u64 a; + u64 arc; - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { - u64 t = mdesc_arc_target(hp, a); - const char *name; - const u64 *id; + /* Since we have an estimate of recursion depth, do a sanity check. */ + if (depth == 0) + return; - name = mdesc_node_name(hp, t); - if (!strcmp(name, "cpu")) { - id = mdesc_get_property(hp, t, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } else { - u64 j; + mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, arc); + const char *name = mdesc_node_name(hp, n); - mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { - u64 n = mdesc_arc_target(hp, j); - const char *n_name; + if (!strcmp(srch_val, name)) + (*func)(hp, n, val); - n_name = mdesc_node_name(hp, n); - if (strcmp(n_name, "cpu")) - continue; - - id = mdesc_get_property(hp, n, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } - } + find_back_node_value(hp, n, srch_val, func, val, depth-1); } } +static void __mark_core_id(struct mdesc_handle *hp, u64 node, + int core_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).core_id = core_id; +} + +static void __mark_sock_id(struct mdesc_handle *hp, u64 node, + int sock_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = sock_id; +} + +static void mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); +} + +static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, + int sock_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); +} + static void set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; idx = 1; + + /* Identify unique cores by looking for cpus backpointed to by + * level 1 instruction caches. + */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *level; const char *type; @@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp) continue; mark_core_ids(hp, mp, idx); - idx++; } } +static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +{ + u64 mp; + int idx = 1; + int fnd = 0; + + /* Identify unique sockets by looking for cpus backpointed to by + * shared level n caches. + */ + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *cur_lvl; + + cur_lvl = mdesc_get_property(hp, mp, "level", NULL); + if (*cur_lvl != level) + continue; + + mark_sock_ids(hp, mp, idx); + idx++; + fnd = 1; + } + return fnd; +} + +static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp) +{ + int idx = 1; + + mdesc_for_each_node_by_name(hp, mp, "socket") { + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) + continue; + + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = idx; + } + idx++; + } +} + +static void set_sock_ids(struct mdesc_handle *hp) +{ + u64 mp; + + /* If machine description exposes sockets data use it. + * Otherwise fallback to use shared L3 or L2 caches. + */ + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); + if (mp != MDESC_NODE_NULL) + return set_sock_ids_by_socket(hp, mp); + + if (!set_sock_ids_by_cache(hp, 3)) + set_sock_ids_by_cache(hp, 2); +} + static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { u64 a; @@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name) continue; mark_proc_ids(hp, mp, idx); - idx++; } } @@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask) set_core_ids(hp); set_proc_ids(hp); + set_sock_ids(hp); mdesc_release(hp); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 61139d9924ca..19cd08d18672 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); +EXPORT_SYMBOL(cpu_core_sib_map); static cpumask_t smp_commenced_mask; @@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void) } } + for_each_present_cpu(i) { + unsigned int j; + + for_each_present_cpu(j) { + if (cpu_data(i).sock_id == cpu_data(j).sock_id) + cpumask_set_cpu(j, &cpu_core_sib_map[i]); + } + } + for_each_present_cpu(i) { unsigned int j; From a7d2bf25a4837e6514a2747380fd4539b63ee20c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 14 Apr 2015 17:35:12 +0300 Subject: [PATCH 002/481] HID: i2c-hid: Do not fail probing if gpiolib is not enabled Using GPIOs and gpiolib is optional. If the kernel is compiled without GPIO support the driver should not fail if it finds the interrupt using normal methods. However, commit a485923efbb8 ("HID: i2c-hid: Add support for ACPI GPIO interrupts") did not take into account that acpi_dev_add_driver_gpios() returns -ENXIO when !CONFIG_GPIOLIB. Fix this by checking the return value against -ENXIO and 0 and only in that case fail the probe. Reported-by: Gabriele Mazzotta Signed-off-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index ab4dd952b6ba..92d6cdf02460 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -862,6 +862,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, union acpi_object *obj; struct acpi_device *adev; acpi_handle handle; + int ret; handle = ACPI_HANDLE(&client->dev); if (!handle || acpi_bus_get_device(handle, &adev)) @@ -877,7 +878,9 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, pdata->hid_descriptor_address = obj->integer.value; ACPI_FREE(obj); - return acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios); + /* GPIOs are optional */ + ret = acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios); + return ret < 0 && ret != -ENXIO ? ret : 0; } static const struct acpi_device_id i2c_hid_acpi_match[] = { From ca082355b542710fd07559ee05f6975c2768344b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 27 Apr 2015 20:36:28 +0900 Subject: [PATCH 003/481] clk: Use CONFIG_ARCH_EXYNOS instead of CONFIG_ARCH_EXYNOS5433 This patch removes the CONFIG_ARCH_EXYNOS5433 and then use only the CONFIG_ARCH_EXYNOS for ARM-64bit Exynos5433 SoC. Signed-off-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile index 17e9af7fe81f..a17683b2cf27 100644 --- a/drivers/clk/samsung/Makefile +++ b/drivers/clk/samsung/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SOC_EXYNOS5250) += clk-exynos5250.o obj-$(CONFIG_SOC_EXYNOS5260) += clk-exynos5260.o obj-$(CONFIG_SOC_EXYNOS5410) += clk-exynos5410.o obj-$(CONFIG_SOC_EXYNOS5420) += clk-exynos5420.o -obj-$(CONFIG_ARCH_EXYNOS5433) += clk-exynos5433.o +obj-$(CONFIG_ARCH_EXYNOS) += clk-exynos5433.o obj-$(CONFIG_SOC_EXYNOS5440) += clk-exynos5440.o obj-$(CONFIG_ARCH_EXYNOS) += clk-exynos-audss.o obj-$(CONFIG_ARCH_EXYNOS) += clk-exynos-clkout.o From a84d1f546c0ef11446d8db6fd1a2c8d24d107b0b Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Mon, 27 Apr 2015 20:36:29 +0900 Subject: [PATCH 004/481] clk: exynos5433: Fix wrong offset of PCLK_MSCL_SECURE_SMMU_JPEG This patch fixes the wrong offoset of PCLK_MSCL_SECURE_SMMU_JPEG in CMU_MSCL domain. Fixes: b274bbfd8b4a94 (clk: samsung: exynos5433: Add clocks for CMU_MSCL domain) Signed-off-by: Jonghwa Lee Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5433.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c index 387e3e39e635..543f9c7707a7 100644 --- a/drivers/clk/samsung/clk-exynos5433.c +++ b/drivers/clk/samsung/clk-exynos5433.c @@ -3927,7 +3927,7 @@ CLK_OF_DECLARE(exynos5433_cmu_atlas, "samsung,exynos5433-cmu-atlas", #define ENABLE_PCLK_MSCL 0x0900 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER0 0x0904 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER1 0x0908 -#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG 0x000c +#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG 0x090c #define ENABLE_SCLK_MSCL 0x0a00 #define ENABLE_IP_MSCL0 0x0b00 #define ENABLE_IP_MSCL1 0x0b04 From bdddbf6996c0b9299efc97b8f66e06286f3aa8c9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 29 Apr 2015 08:42:44 +0800 Subject: [PATCH 005/481] xfrm: fix a race in xfrm_state_lookup_byspi The returned xfrm_state should be hold before unlock xfrm_state_lock, otherwise the returned xfrm_state maybe be released. Fixes: c454997e6[{pktgen, xfrm} Introduce xfrm_state_lookup_byspi..] Cc: Fan Du Signed-off-by: Li RongQing Acked-by: Fan Du Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f5e39e35d73a..96688cd0f6f1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -927,8 +927,8 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, x->id.spi != spi) continue; - spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_hold(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } spin_unlock_bh(&net->xfrm.xfrm_state_lock); From 1a9f6c887db891ed477642c38a1e17ee23c3f02b Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Mon, 27 Apr 2015 20:36:30 +0900 Subject: [PATCH 006/481] clk: exynos5433: Fix CLK_PCLK_MONOTONIC_CNT clk register assignment CLK_PCLK_MONOTONIC_CNT clock had a wrong register assigned to it. The correct register is ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT. Signed-off-by: Jonghwa Lee Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5433.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c index 543f9c7707a7..b1a546e402aa 100644 --- a/drivers/clk/samsung/clk-exynos5433.c +++ b/drivers/clk/samsung/clk-exynos5433.c @@ -1490,7 +1490,7 @@ static struct samsung_gate_clock mif_gate_clks[] __initdata = { /* ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT */ GATE(CLK_PCLK_MONOTONIC_CNT, "pclk_monotonic_cnt", "div_aclk_mif_133", - ENABLE_PCLK_MIF_SECURE_RTC, 0, 0, 0), + ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT, 0, 0, 0), /* ENABLE_PCLK_MIF_SECURE_RTC */ GATE(CLK_PCLK_RTC, "pclk_rtc", "div_aclk_mif_133", From b57c93be79e4061b8878315df2f6a239ad967d50 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 27 Apr 2015 20:36:31 +0900 Subject: [PATCH 007/481] clk: exynos5433: Fix wrong parent clock of sclk_apollo clock This patch fixes the wrong parent clock of sclk_apollo clock from 'div_apollo_pll' to 'div_apollo2'. Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5433.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c index b1a546e402aa..ec294260ef09 100644 --- a/drivers/clk/samsung/clk-exynos5433.c +++ b/drivers/clk/samsung/clk-exynos5433.c @@ -3665,7 +3665,7 @@ static struct samsung_gate_clock apollo_gate_clks[] __initdata = { ENABLE_SCLK_APOLLO, 3, CLK_IGNORE_UNUSED, 0), GATE(CLK_SCLK_HPM_APOLLO, "sclk_hpm_apollo", "div_sclk_hpm_apollo", ENABLE_SCLK_APOLLO, 1, CLK_IGNORE_UNUSED, 0), - GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo_pll", + GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo2", ENABLE_SCLK_APOLLO, 0, CLK_IGNORE_UNUSED, 0), }; From 85943d7ea5832afd454b7219ec1d8c2498c4fbcc Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 27 Apr 2015 20:36:32 +0900 Subject: [PATCH 008/481] clk: exynos5433: Fix wrong PMS value of exynos5433_pll_rates This patch fixes the wrong PMS value of exynos5433_pll_rates table for {ATLAS|APOLLO|MEM0|MEM1|BUS|MFC|MPHY|G3D|DISP|ISP|_PLL. - 720 MHz (mdiv=360, pdiv=6, sdiv=1) -> 700 MHz (mdiv=175, pdiv=3, sdiv=1) - 350 MHz (mdiv=360, pdiv=6, sdiv=2) -> (mdiv=350, pdiv=6, sdiv=2) - 133 MHz (mdiv=552, pdiv=6, sdiv=4) -> (mdiv=532, pdiv=6, sdiv=4) Signed-off-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5433.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c index ec294260ef09..9e04ae2bb4d7 100644 --- a/drivers/clk/samsung/clk-exynos5433.c +++ b/drivers/clk/samsung/clk-exynos5433.c @@ -748,7 +748,7 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = { PLL_35XX_RATE(825000000U, 275, 4, 1), PLL_35XX_RATE(800000000U, 400, 6, 1), PLL_35XX_RATE(733000000U, 733, 12, 1), - PLL_35XX_RATE(700000000U, 360, 6, 1), + PLL_35XX_RATE(700000000U, 175, 3, 1), PLL_35XX_RATE(667000000U, 222, 4, 1), PLL_35XX_RATE(633000000U, 211, 4, 1), PLL_35XX_RATE(600000000U, 500, 5, 2), @@ -760,14 +760,14 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = { PLL_35XX_RATE(444000000U, 370, 5, 2), PLL_35XX_RATE(420000000U, 350, 5, 2), PLL_35XX_RATE(400000000U, 400, 6, 2), - PLL_35XX_RATE(350000000U, 360, 6, 2), + PLL_35XX_RATE(350000000U, 350, 6, 2), PLL_35XX_RATE(333000000U, 222, 4, 2), PLL_35XX_RATE(300000000U, 500, 5, 3), PLL_35XX_RATE(266000000U, 532, 6, 3), PLL_35XX_RATE(200000000U, 400, 6, 3), PLL_35XX_RATE(166000000U, 332, 6, 3), PLL_35XX_RATE(160000000U, 320, 6, 3), - PLL_35XX_RATE(133000000U, 552, 6, 4), + PLL_35XX_RATE(133000000U, 532, 6, 4), PLL_35XX_RATE(100000000U, 400, 6, 4), { /* sentinel */ } }; From 771aada9ace7e5dd837a69ef0bca08b5455b2d36 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 27 Apr 2015 11:12:25 +0200 Subject: [PATCH 009/481] s390/bpf: Adjust ALU64_DIV/MOD to match interpreter change The s390x ALU64_DIV/MOD has been implemented according to the eBPF interpreter specification that used do_div(). This function does a 64-bit by 32-bit divide. It turned out that this was wrong and now the interpreter uses div64_u64_rem() for full 64-bit division. So fix this and use full 64-bit division in the s390x eBPF backend code. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 7690dc8e1ab5..065aca02bc65 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -588,8 +588,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -602,10 +602,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4_IMM(0xa7090000, REG_W0, 0); /* lgr %w1,%dst */ EMIT4(0xb9040000, REG_W1, dst_reg); - /* llgfr %dst,%src (u32 cast) */ - EMIT4(0xb9160000, dst_reg, src_reg); /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + EMIT4(0xb9870000, REG_W0, src_reg); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; @@ -632,8 +630,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -649,7 +647,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9040000, REG_W1, dst_reg); /* dlg %w0,(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64((u32) imm)); + EMIT_CONST_U64(imm)); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; From 8cc2af7c530e320db442e6efec8c84c125c07c81 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Tue, 28 Apr 2015 10:31:44 +0200 Subject: [PATCH 010/481] s390/zcrypt: fixed ap poll timer behavior The ap poll timer restart condition was wrong. Hence the poll timer was not restarted reliable when setting a new time interval via the poll_timeout sysfs attribute. Added missing timer locking. Reported-by: Thomas Gleixner Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f0b9871a4bbd..6e506a88f43e 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1158,11 +1158,12 @@ static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf, poll_timeout = time; hr_time = ktime_set(0, poll_timeout); - if (!hrtimer_is_queued(&ap_poll_timer) || - !hrtimer_forward(&ap_poll_timer, hrtimer_get_expires(&ap_poll_timer), hr_time)) { - hrtimer_set_expires(&ap_poll_timer, hr_time); - hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS); - } + spin_lock_bh(&ap_poll_timer_lock); + hrtimer_cancel(&ap_poll_timer); + hrtimer_set_expires(&ap_poll_timer, hr_time); + hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS); + spin_unlock_bh(&ap_poll_timer_lock); + return count; } @@ -1528,14 +1529,11 @@ static inline void __ap_schedule_poll_timer(void) ktime_t hr_time; spin_lock_bh(&ap_poll_timer_lock); - if (hrtimer_is_queued(&ap_poll_timer) || ap_suspend_flag) - goto out; - if (ktime_to_ns(hrtimer_expires_remaining(&ap_poll_timer)) <= 0) { + if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) { hr_time = ktime_set(0, poll_timeout); hrtimer_forward_now(&ap_poll_timer, hr_time); hrtimer_restart(&ap_poll_timer); } -out: spin_unlock_bh(&ap_poll_timer_lock); } From b9b4b1cef156e6b403b26ea4cb6d0caf4850e05c Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 29 Apr 2015 18:45:03 +0200 Subject: [PATCH 011/481] s390/bpf: Fix gcov stack space problem When compiling the kernel for GCOV (CONFIG_GCOV_KERNEL,-fprofile-arcs), gcc allocates a lot of stack space because of the large switch statement in bpf_jit_insn(). This leads to the following compile warning: arch/s390/net/bpf_jit_comp.c: In function 'bpf_jit_prog': arch/s390/net/bpf_jit_comp.c:1144:1: warning: frame size of function 'bpf_jit_prog' is 12592 bytes which is more than half the stack size. The dynamic check would not be reliable. No check emitted for this function. arch/s390/net/bpf_jit_comp.c:1144:1: warning: the frame size of 12504 bytes is larger than 1024 bytes [-Wframe-larger-than=] And indead gcc allocates 12592 bytes of stack space: # objdump -d arch/s390/net/bpf_jit_comp.o ... 0000000000000c60 : c60: eb 6f f0 48 00 24 stmg %r6,%r15,72(%r15) c66: b9 04 00 ef lgr %r14,%r15 c6a: e3 f0 fe d0 fc 71 lay %r15,-12592(%r15) As a workaround of that problem we now define bpf_jit_insn() as noinline which then reduces the stack space. # objdump -d arch/s390/net/bpf_jit_comp.o ... 0000000000000070 : 70: eb 6f f0 48 00 24 stmg %r6,%r15,72(%r15) 76: c0 d0 00 00 00 00 larl %r13,76 7c: a7 f1 3f 80 tmll %r15,16256 80: b9 04 00 ef lgr %r14,%r15 84: e3 f0 ff a0 ff 71 lay %r15,-96(%r15) Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 065aca02bc65..20c146d1251a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -443,8 +443,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* * Compile one eBPF instruction into s390x code + * + * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of + * stack space for the large switch statement. */ -static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) +static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) { struct bpf_insn *insn = &fp->insnsi[i]; int jmp_off, last, insn_count = 1; From 27796e755a2fc6707cdf9b484a56395f703e4f3d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 30 Apr 2015 11:57:41 +0300 Subject: [PATCH 012/481] ASoC: davinci-mcasp: Correct pm status check in suspend callback pm_runtime_enabled() will only tell if the pm runtime has been enabled for the device, which is done at probe time but will not tell the actual power state of the device. pm_runtime_active() provides this information. This patch fixes a kernel crash when doing suspend when McASP is not active. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- sound/soc/davinci/davinci-mcasp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index bb4b78eada58..23c91fa65ab8 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1247,7 +1247,7 @@ static int davinci_mcasp_suspend(struct snd_soc_dai *dai) u32 reg; int i; - context->pm_state = pm_runtime_enabled(mcasp->dev); + context->pm_state = pm_runtime_active(mcasp->dev); if (!context->pm_state) pm_runtime_get_sync(mcasp->dev); From 52cdc33c383850433d8c93e4aa469ed9bfcd1c56 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 29 Apr 2015 14:52:46 +0300 Subject: [PATCH 013/481] clk: qcom: Fix MSM8916 venus divider value One of the video codec clock frequencies has incorrect divider value. Fix it. Fixes: 3966fab8b6ab "clk: qcom: Add MSM8916 Global Clock Controller support" Signed-off-by: Georgi Djakov Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-msm8916.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c index d3458474eb3a..b15d52b56c6a 100644 --- a/drivers/clk/qcom/gcc-msm8916.c +++ b/drivers/clk/qcom/gcc-msm8916.c @@ -1115,7 +1115,7 @@ static struct clk_rcg2 usb_hs_system_clk_src = { static const struct freq_tbl ftbl_gcc_venus0_vcodec0_clk[] = { F(100000000, P_GPLL0, 8, 0, 0), F(160000000, P_GPLL0, 5, 0, 0), - F(228570000, P_GPLL0, 5, 0, 0), + F(228570000, P_GPLL0, 3.5, 0, 0), { } }; From 5d45ed8f5bfaeaf50576b8d663796ee905ccf2a0 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 29 Apr 2015 14:52:47 +0300 Subject: [PATCH 014/481] clk: qcom: Fix MSM8916 gfx3d_clk_src configuration The gfx3d_clk_src parents configuration is incorrect. Fix it. Fixes: 3966fab8b6ab "clk: qcom: Add MSM8916 Global Clock Controller support" Signed-off-by: Georgi Djakov Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-msm8916.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c index b15d52b56c6a..c66f7bc2ae87 100644 --- a/drivers/clk/qcom/gcc-msm8916.c +++ b/drivers/clk/qcom/gcc-msm8916.c @@ -71,8 +71,8 @@ static const char *gcc_xo_gpll0_bimc[] = { static const struct parent_map gcc_xo_gpll0a_gpll1_gpll2a_map[] = { { P_XO, 0 }, { P_GPLL0_AUX, 3 }, - { P_GPLL2_AUX, 2 }, { P_GPLL1, 1 }, + { P_GPLL2_AUX, 2 }, }; static const char *gcc_xo_gpll0a_gpll1_gpll2a[] = { From 88776f366ede7d9cdce60bd2c9753dd6d6fa8b77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 May 2015 09:20:34 +0200 Subject: [PATCH 015/481] ALSA: hda - Add headphone quirk for Lifebook E752 Fujitsu Lifebook E752 laptop needs a similar quirk done for Lifebook T731. Otherwise the headphone is always muted. Reported-and-tested-by: Christian Weber Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e2afd53cc14c..eb2fc79cbd32 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5119,6 +5119,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), + SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC), From 545774bd6e1427d98dde77244329d2311c5eca6f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 27 Apr 2015 14:51:35 +0800 Subject: [PATCH 016/481] ASoC: mc13783: Fix wrong mask value used in mc13xxx_reg_rmw() calls mc13xxx_reg_rmw() won't change any bit if passing 0 to the mask field. Pass AUDIO_SSI_SEL instead of 0 for the mask field to set AUDIO_SSI_SEL bit. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/mc13783.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c index 2ffb9a0570dc..3d44fc50e4d0 100644 --- a/sound/soc/codecs/mc13783.c +++ b/sound/soc/codecs/mc13783.c @@ -623,14 +623,14 @@ static int mc13783_probe(struct snd_soc_codec *codec) AUDIO_SSI_SEL, 0); else mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_CODEC, - 0, AUDIO_SSI_SEL); + AUDIO_SSI_SEL, AUDIO_SSI_SEL); if (priv->dac_ssi_port == MC13783_SSI1_PORT) mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_DAC, AUDIO_SSI_SEL, 0); else mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_DAC, - 0, AUDIO_SSI_SEL); + AUDIO_SSI_SEL, AUDIO_SSI_SEL); return 0; } From a928d28d4487402e6bd18bea1b8cc2b2ec6e6d8f Mon Sep 17 00:00:00 2001 From: Evgenii Lepikhin Date: Tue, 21 Apr 2015 15:49:57 +0300 Subject: [PATCH 017/481] ISCSI: fix minor memory leak This patch adds a missing kfree for sess->sess_ops memory upon transport_init_session() failure. Signed-off-by: Evgenii Lepikhin Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_login.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 8ce94ff744e6..70d799dfab03 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -346,6 +346,7 @@ static int iscsi_login_zero_tsih_s1( if (IS_ERR(sess->se_sess)) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); + kfree(sess->sess_ops); kfree(sess); return -ENOMEM; } From 8ee83a747ac2309934c229281dda8f26648ec462 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 1 May 2015 10:43:45 -0700 Subject: [PATCH 018/481] target/user: Disallow full passthrough (pass_level=0) TCMU requires more work to correctly handle both user handlers that want all SCSI commands (pass_level=0) for a se_device, and also handlers that just want I/O commands and let the others be emulated by the kernel (pass_level=1). Only support the latter for now. For full passthrough, we will need to support a second se_subsystem_api template, due to configfs attributes being different between the two modes. Thus pass_level is extraneous, and we can remove it. The ABI break for TCMU v2 is already applied for this release, so it's best to do this now to avoid another ABI break in the future. Signed-off-by: Andy Grover Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- Documentation/target/tcmu-design.txt | 18 +++----- drivers/target/target_core_user.c | 68 ++-------------------------- 2 files changed, 10 insertions(+), 76 deletions(-) diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index 43e94ea6d2ca..c80bf1ed8981 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -15,7 +15,7 @@ Contents: a) Discovering and configuring TCMU uio devices b) Waiting for events on the device(s) c) Managing the command ring -3) Command filtering and pass_level +3) Command filtering 4) A final note @@ -360,17 +360,13 @@ int handle_device_events(int fd, void *map) } -Command filtering and pass_level --------------------------------- +Command filtering +----------------- -TCMU supports a "pass_level" option with valid values of 0 or 1. When -the value is 0 (the default), nearly all SCSI commands received for -the device are passed through to the handler. This allows maximum -flexibility but increases the amount of code required by the handler, -to support all mandatory SCSI commands. If pass_level is set to 1, -then only IO-related commands are presented, and the rest are handled -by LIO's in-kernel command emulation. The commands presented at level -1 include all versions of: +Initial TCMU support is for a filtered commandset. Only IO-related +commands are presented to userspace, and the rest are handled by LIO's +in-kernel command emulation. The commands presented are all versions +of: READ WRITE diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index dbc872a6c981..0e0feeaec39c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -71,13 +71,6 @@ struct tcmu_hba { u32 host_id; }; -/* User wants all cmds or just some */ -enum passthru_level { - TCMU_PASS_ALL = 0, - TCMU_PASS_IO, - TCMU_PASS_INVALID, -}; - #define TCMU_CONFIG_LEN 256 struct tcmu_dev { @@ -89,7 +82,6 @@ struct tcmu_dev { #define TCMU_DEV_BIT_OPEN 0 #define TCMU_DEV_BIT_BROKEN 1 unsigned long flags; - enum passthru_level pass_level; struct uio_info uio_info; @@ -683,8 +675,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) setup_timer(&udev->timeout, tcmu_device_timedout, (unsigned long)udev); - udev->pass_level = TCMU_PASS_ALL; - return &udev->se_dev; } @@ -948,13 +938,12 @@ static void tcmu_free_device(struct se_device *dev) } enum { - Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level, + Opt_dev_config, Opt_dev_size, Opt_err, }; static match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%u"}, - {Opt_pass_level, "pass_level=%u"}, {Opt_err, NULL} }; @@ -965,7 +954,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; int ret = 0, token; - int arg; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -998,16 +986,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; - case Opt_pass_level: - match_int(args, &arg); - if (arg >= TCMU_PASS_INVALID) { - pr_warn("TCMU: Invalid pass_level: %d\n", arg); - break; - } - - pr_debug("TCMU: Setting pass_level to %d\n", arg); - udev->pass_level = arg; - break; default: break; } @@ -1024,8 +1002,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n", - udev->dev_size, udev->pass_level); + bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); return bl; } @@ -1074,46 +1051,7 @@ static struct sbc_ops tcmu_sbc_ops = { static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev); - sense_reason_t ret; - - switch (udev->pass_level) { - case TCMU_PASS_ALL: - /* We're just like pscsi, then */ - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else, pass it up. - */ - switch (cdb[0]) { - case REPORT_LUNS: - cmd->execute_cmd = spc_emulate_report_luns; - break; - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - /* FALLTHROUGH */ - default: - cmd->execute_cmd = tcmu_pass_op; - } - ret = TCM_NO_SENSE; - break; - case TCMU_PASS_IO: - ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops); - break; - default: - pr_err("Unknown tcm-user pass level %d\n", udev->pass_level); - ret = TCM_CHECK_CONDITION_ABORT_CMD; - } - - return ret; + return sbc_parse_cdb(cmd, &tcmu_sbc_ops); } DEF_TB_DEFAULT_ATTRIBS(tcmu); From ec6bfca835b61df360881c1bf89268f69fed2a61 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 4 May 2015 15:50:47 +0800 Subject: [PATCH 019/481] ALSA: hda/realtek - Fix typo for ALC286/ALC288 Delete more one break for ALC286/ALC288. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index eb2fc79cbd32..516a0da6c080 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3738,7 +3738,6 @@ static void alc_headset_mode_default(struct hda_codec *codec) case 0x10ec0288: alc_process_coef_fw(codec, coef0288); break; - break; case 0x10ec0292: alc_process_coef_fw(codec, coef0292); break; From 810e4425c224af6be67dff68c8832af1b5a11f89 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Sun, 3 May 2015 21:11:47 +0300 Subject: [PATCH 020/481] ASoC: uda1380: Avoid accessing i2c bus when codec is disabled set_dai_fmt_both() callback is called from snd_soc_runtime_set_dai_fmt() which is called from snd_soc_register_card(), but at this time codec is not powered on yet. Replace direct i2c write with regcache write. Fixes: 5f0acedddf533c (ASoC: rx1950_uda1380: Use static DAI format setup) Fixes: 5cc10b9b77c234 (ASoC: h1940_uda1380: Use static DAI format setup) Signed-off-by: Vasily Khoruzhick Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/uda1380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index dc7778b6dd7f..c3c33bd0df1c 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -437,7 +437,7 @@ static int uda1380_set_dai_fmt_both(struct snd_soc_dai *codec_dai, if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBS_CFS) return -EINVAL; - uda1380_write(codec, UDA1380_IFACE, iface); + uda1380_write_reg_cache(codec, UDA1380_IFACE, iface); return 0; } From 97372e5a99449b4fffa824a382ad6358066a9918 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 8 Apr 2015 07:34:12 +0200 Subject: [PATCH 021/481] clk: exynos5420: Restore GATE_BUS_TOP on suspend Commit ae43b3289186 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12") added pm support for the pl330 dma driver but it makes the clock for the Exynos5420 MDMA0 DMA controller to be gated during suspend and this in turn makes its parent clock aclk266_g2d to be gated. But the clock needs to be ungated prior suspend to allow the system to be suspend and resumed correctly. Add GATE_BUS_TOP register to the list of registers to be restored when the system enters into a suspend state so aclk266_g2d will be ungated. Thanks to Abhilash Kesavan for figuring out that this was the issue. Fixes: ae43b32 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12") Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Javier Martinez Canillas Tested-by: Kevin Hilman Tested-by: Abhilash Kesavan Acked-by: Tomasz Figa Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5420.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 07d666cc6a29..bea4a173eef5 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -271,6 +271,7 @@ static const struct samsung_clk_reg_dump exynos5420_set_clksrc[] = { { .offset = SRC_MASK_PERIC0, .value = 0x11111110, }, { .offset = SRC_MASK_PERIC1, .value = 0x11111100, }, { .offset = SRC_MASK_ISP, .value = 0x11111000, }, + { .offset = GATE_BUS_TOP, .value = 0xffffffff, }, { .offset = GATE_BUS_DISP1, .value = 0xffffffff, }, { .offset = GATE_IP_PERIC, .value = 0xffffffff, }, }; From 2c674fac5b1603c6a2aacc88116e3fbc75ebd799 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 5 May 2015 15:02:42 +0800 Subject: [PATCH 022/481] ALSA: hda/realtek - Add ALC298 alias name for Dell Add ALC3266 for Dell platform. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 516a0da6c080..4a0a238c9864 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -883,6 +883,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0668, 0x1028, 0, "ALC3661" }, { 0x10ec0275, 0x1028, 0, "ALC3260" }, { 0x10ec0899, 0x1028, 0, "ALC3861" }, + { 0x10ec0298, 0x1028, 0, "ALC3266" }, { 0x10ec0670, 0x1025, 0, "ALC669X" }, { 0x10ec0676, 0x1025, 0, "ALC679X" }, { 0x10ec0282, 0x1043, 0, "ALC3229" }, From 1222e564cf4394af0b3c5e8a73330b20862c068b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 May 2015 06:23:59 +0200 Subject: [PATCH 023/481] x86/platform/uv: Make SGI UV dependent on CONFIG_PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent PCI changes stopped exporting PCI constants if !CONFIG_PCI, which made the UV build fail: arch/x86/kernel/apic/x2apic_uv_x.c:843:16: error: ‘PCI_VGA_STATE_CHANGE_BRIDGE’ undeclared (first use in this function) arch/x86/kernel/apic/x2apic_uv_x.c:1023:2: error: implicit declaration of function ‘pci_register_set_vga_state’ [-Werror=implicit-function-declaration] As it's unlikely that an UV bootup will get far without PCI enumeration, make the platform Kconfig switch (CONFIG_X86_UV) depend on CONFIG_PCI=y. Cc: Robin Holt Cc: Dimitri Sivanich Cc: Russ Anderson Cc: Mike Travis Cc: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..066d9bdc5496 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -441,6 +441,7 @@ config X86_UV depends on X86_EXTENDED_PLATFORM depends on NUMA depends on X86_X2APIC + depends on PCI ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. From afdf344e08fbec28ab2204a626fa1f260dcb68be Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:53 -0500 Subject: [PATCH 024/481] x86/mce/amd: Factor out logging mechanism Refactor the code here to setup struct mce and call mce_log() to log the error. We're going to reuse this in a later patch as part of the deferred error interrupt enablement. No functional change is introduced. Suggested-by: Borislav Petkov Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-2-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 55ad9b37cae8..5f25de20db36 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -264,6 +264,27 @@ init: } } +static void __log_error(unsigned int bank, bool threshold_err, u64 misc) +{ + struct mce m; + u64 status; + + rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + if (!(status & MCI_STATUS_VAL)) + return; + + mce_setup(&m); + + m.status = status; + m.bank = bank; + if (threshold_err) + m.misc = misc; + + mce_log(&m); + + wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); +} + /* * APIC Interrupt Handler */ @@ -273,12 +294,12 @@ init: * the interrupt goes off when error_count reaches threshold_limit. * the handler will simply log mcelog w/ software defined bank number. */ + static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; int cpu = smp_processor_id(); unsigned int bank, block; - struct mce m; /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { @@ -321,15 +342,7 @@ static void amd_threshold_interrupt(void) return; log: - mce_setup(&m); - rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); - if (!(m.status & MCI_STATUS_VAL)) - return; - m.misc = ((u64)high << 32) | low; - m.bank = bank; - mce_log(&m); - - wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); + __log_error(bank, true, ((u64)high << 32) | low); } /* From 6e6e746e33e9555a7fce159d25314c9df3bcda93 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:54 -0500 Subject: [PATCH 025/481] x86/mce/amd: Collect valid address before logging an error amd_decode_mce() needs value in m->addr so it can report the error address correctly. This should be setup in __log_error() before we call mce_log(). We do this because the error address is an important bit of information which should be conveyed to userspace. The correct output then reports proper address, like this: [Hardware Error]: Corrected error, no action required. [Hardware Error]: CPU:0 (15:60:0) MC0_STATUS [-|CE|-|-|AddrV|-|-|CECC]: 0x840041000028017b [Hardware Error]: MC0 Error Address: 0x00001f808f0ff040 Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-3-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5f25de20db36..607075726e10 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -277,11 +277,14 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) m.status = status; m.bank = bank; + if (threshold_err) m.misc = misc; - mce_log(&m); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); + mce_log(&m); wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); } From 7559e13fb4abe7880dfaf985d6a1630ca90a67ce Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:55 -0500 Subject: [PATCH 026/481] x86/mce: Add support for deferred errors on AMD Deferred errors indicate error conditions that were not corrected, but those errors have not been consumed yet. They require no action from S/W (or action is optional). These errors provide info about a latent uncorrectable MCE that can occur when a poisoned data is consumed by the processor. Newer AMD processors can generate deferred errors and can be configured to generate APIC interrupts on such events. SUCCOR stands for S/W UnCorrectable error COntainment and Recovery. It indicates support for data poisoning in HW and deferred error interrupts. Add new bitfield to mce_vendor_flags for this. We use this to verify presence of deferred error interrupts before we enable them in mce_amd.c While at it, clarify comments in mce_vendor_flags to provide an indication of usages of the bitfields. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-4-git-send-email-Aravind.Gopalakrishnan@amd.com [ beef up commit message, do CPUID(8000_0007) only once. ] Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 15 +++++++++++++-- arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1f5a86d518db..407ced642ac1 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -117,8 +117,19 @@ struct mca_config { }; struct mce_vendor_flags { - __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ - __reserved_0 : 63; + /* + * overflow recovery cpuid bit indicates that overflow + * conditions are not fatal + */ + __u64 overflow_recov : 1, + + /* + * SUCCOR stands for S/W UnCorrectable error COntainment + * and Recovery. It indicates support for data poisoning + * in HW and deferred error interrupts. + */ + succor : 1, + __reserved_0 : 62; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..521e5016aca6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1637,10 +1637,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_intel_feature_init(c); mce_adjust_timer = cmci_intel_adjust_timer; break; - case X86_VENDOR_AMD: + + case X86_VENDOR_AMD: { + u32 ebx = cpuid_ebx(0x80000007); + mce_amd_feature_init(c); - mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; + mce_flags.overflow_recov = !!(ebx & BIT(0)); + mce_flags.succor = !!(ebx & BIT(1)); break; + } + default: break; } From d2a5d46b167a9a8231264daf80165b739aecf1d7 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 15 Apr 2015 22:26:36 +0800 Subject: [PATCH 027/481] clk: add missing lock when call clk_core_enable in clk_set_parent Before commit 035a61c314eb ("clk: Make clk API return per-user struct clk instances") we acquired the enable_lock in __clk_set_parent_{before,after}() by means of calling clk_enable(). After commit 035a61c314eb we use clk_core_enable() in place of the clk_enable(), and clk_core_enable() doesn't acquire the enable_lock. This opens up a race condition between clk_set_parent() and clk_enable(). Fix it. Fixes: 035a61c314eb ("clk: Make clk API return per-user struct clk instances") Cc: Mike Turquette Cc: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 459ce9da13e0..5b0f41868b42 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1475,8 +1475,10 @@ static struct clk_core *__clk_set_parent_before(struct clk_core *clk, */ if (clk->prepare_count) { clk_core_prepare(parent); + flags = clk_enable_lock(); clk_core_enable(parent); clk_core_enable(clk); + clk_enable_unlock(flags); } /* update the clk tree topology */ @@ -1491,13 +1493,17 @@ static void __clk_set_parent_after(struct clk_core *core, struct clk_core *parent, struct clk_core *old_parent) { + unsigned long flags; + /* * Finish the migration of prepare state and undo the changes done * for preventing a race with clk_enable(). */ if (core->prepare_count) { + flags = clk_enable_lock(); clk_core_disable(core); clk_core_disable(old_parent); + clk_enable_unlock(flags); clk_core_unprepare(old_parent); } } @@ -1525,8 +1531,10 @@ static int __clk_set_parent(struct clk_core *clk, struct clk_core *parent, clk_enable_unlock(flags); if (clk->prepare_count) { + flags = clk_enable_lock(); clk_core_disable(clk); clk_core_disable(parent); + clk_enable_unlock(flags); clk_core_unprepare(parent); } return ret; From 24fd78a81f6d3fe7f7a440c8629f9c52cd5f830e Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:56 -0500 Subject: [PATCH 028/481] x86/mce/amd: Introduce deferred error interrupt handler Deferred errors indicate error conditions that were not corrected, but require no action from S/W (or action is optional).These errors provide info about a latent UC MCE that can occur when a poisoned data is consumed by the processor. Processors that report these errors can be configured to generate APIC interrupts to notify OS about the error. Provide an interrupt handler in this patch so that OS can catch these errors as and when they happen. Currently, we simply log the errors and exit the handler as S/W action is not mandated. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-5-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/entry_arch.h | 3 + arch/x86/include/asm/hardirq.h | 3 + arch/x86/include/asm/hw_irq.h | 2 + arch/x86/include/asm/irq_vectors.h | 1 + arch/x86/include/asm/mce.h | 3 + arch/x86/include/asm/trace/irq_vectors.h | 6 ++ arch/x86/include/asm/traps.h | 3 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 93 ++++++++++++++++++++++++ arch/x86/kernel/entry_64.S | 5 ++ arch/x86/kernel/irq.c | 6 ++ arch/x86/kernel/irqinit.c | 4 + arch/x86/kernel/traps.c | 5 ++ 12 files changed, 133 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index dc5fa661465f..6da46dbaac87 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) #endif +#ifdef CONFIG_X86_MCE_AMD +BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) +#endif #endif diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f5fb6b6567e..db9f536f482f 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -33,6 +33,9 @@ typedef struct { #ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; #endif +#ifdef CONFIG_X86_MCE_AMD + unsigned int irq_deferred_error_count; +#endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) unsigned int irq_hv_callback_count; #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..f71e489d7537 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void); extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); +extern asmlinkage void deferred_error_interrupt(void); extern asmlinkage void call_function_interrupt(void); extern asmlinkage void call_function_single_interrupt(void); @@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void); extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); extern void trace_threshold_interrupt(void); +extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..026fc1e1599c 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -113,6 +113,7 @@ #define IRQ_WORK_VECTOR 0xf6 #define UV_BAU_MESSAGE 0xf5 +#define DEFERRED_ERROR_VECTOR 0xf4 /* Vector on which hypervisor callbacks will be delivered */ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 407ced642ac1..6a3034a0a072 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -234,6 +234,9 @@ void do_machine_check(struct pt_regs *, long); extern void (*mce_threshold_vector)(void); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); +/* Deferred error interrupt handler */ +extern void (*deferred_error_int_vector)(void); + /* * Thermal handler */ diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 4cab890007a7..38a09a13a9bc 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -100,6 +100,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single); */ DEFINE_IRQ_VECTOR_EVENT(threshold_apic); +/* + * deferred_error_apic - called when entering/exiting a deferred apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); + /* * thermal_apic - called when entering/exiting a thermal apic interrupt * vector handler diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4e49d7dff78e..c5380bea2a36 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); -asmlinkage void mce_threshold_interrupt(void); +asmlinkage void smp_threshold_interrupt(void); +asmlinkage void smp_deferred_error_interrupt(void); #endif extern enum ctx_state ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 607075726e10..2e7ebe7e1e80 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -12,6 +12,8 @@ * - added support for AMD Family 0x10 processors * May 2012 * - major scrubbing + * May 2015 + * - add support for deferred error interrupts (Aravind Gopalakrishnan) * * All MC4_MISCi registers are shared between multi-cores */ @@ -32,6 +34,7 @@ #include #include #include +#include #define NR_BLOCKS 9 #define THRESHOLD_MAX 0xFFF @@ -47,6 +50,13 @@ #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +/* Deferred error settings */ +#define MSR_CU_DEF_ERR 0xC0000410 +#define MASK_DEF_LVTOFF 0x000000F0 +#define MASK_DEF_INT_TYPE 0x00000006 +#define DEF_LVT_OFF 0x2 +#define DEF_INT_TYPE_APIC 0x2 + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -60,6 +70,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ static void amd_threshold_interrupt(void); +static void amd_deferred_error_interrupt(void); + +static void default_deferred_error_interrupt(void) +{ + pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR); +} +void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; /* * CPU Initialization @@ -205,6 +222,39 @@ static int setup_APIC_mce(int reserved, int new) return reserved; } +static int setup_APIC_deferred_error(int reserved, int new) +{ + if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, + APIC_EILVT_MSG_FIX, 0)) + return new; + + return reserved; +} + +static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) +{ + u32 low = 0, high = 0; + int def_offset = -1, def_new; + + if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) + return; + + def_new = (low & MASK_DEF_LVTOFF) >> 4; + if (!(low & MASK_DEF_LVTOFF)) { + pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); + def_new = DEF_LVT_OFF; + low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); + } + + def_offset = setup_APIC_deferred_error(def_offset, def_new); + if ((def_offset == def_new) && + (deferred_error_int_vector != amd_deferred_error_interrupt)) + deferred_error_int_vector = amd_deferred_error_interrupt; + + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + wrmsr(MSR_CU_DEF_ERR, low, high); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -262,6 +312,9 @@ init: mce_threshold_block_init(&b, offset); } } + + if (mce_flags.succor) + deferred_error_interrupt_enable(c); } static void __log_error(unsigned int bank, bool threshold_err, u64 misc) @@ -288,6 +341,46 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); } +static inline void __smp_deferred_error_interrupt(void) +{ + inc_irq_stat(irq_deferred_error_count); + deferred_error_int_vector(); +} + +asmlinkage __visible void smp_deferred_error_interrupt(void) +{ + entering_irq(); + __smp_deferred_error_interrupt(); + exiting_ack_irq(); +} + +asmlinkage __visible void smp_trace_deferred_error_interrupt(void) +{ + entering_irq(); + trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); + __smp_deferred_error_interrupt(); + trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); + exiting_ack_irq(); +} + +/* APIC interrupt handler for deferred errors */ +static void amd_deferred_error_interrupt(void) +{ + u64 status; + unsigned int bank; + + for (bank = 0; bank < mca_cfg.banks; ++bank) { + rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + + if (!(status & MCI_STATUS_VAL) || + !(status & MCI_STATUS_DEFERRED)) + continue; + + __log_error(bank, false, 0); + break; + } +} + /* * APIC Interrupt Handler */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478d..12aea85fe738 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt #endif +#ifdef CONFIG_X86_MCE_AMD +apicinterrupt DEFERRED_ERROR_VECTOR \ + deferred_error_interrupt smp_deferred_error_interrupt +#endif + #ifdef CONFIG_X86_THERMAL_VECTOR apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e5952c225532..590ed6c1bf51 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_puts(p, " Threshold APIC interrupts\n"); #endif +#ifdef CONFIG_X86_MCE_AMD + seq_printf(p, "%*s: ", prec, "DFR"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); + seq_puts(p, " Deferred Error APIC interrupts\n"); +#endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..d7ec6e7b2b5b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -135,6 +135,10 @@ static void __init apic_intr_init(void) alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif +#ifdef CONFIG_X86_MCE_AMD + alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt); +#endif + #ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..68b1d5979a46 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -827,6 +827,11 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) { } +asmlinkage __visible void __attribute__((weak)) +smp_deferred_error_interrupt(void) +{ +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task From 5c0d728e1a8ccbaf68ec37181e466627ba0a6efc Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:57 -0500 Subject: [PATCH 029/481] x86/irq: Cleanup ordering of vector numbers Sort vector number assignments in proper descending order. No functional change. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Link: http://lkml.kernel.org/r/1430913538-1415-6-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/irq_vectors.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 026fc1e1599c..8900ba444ad7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,11 +102,6 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 -/* Vector for KVM to deliver posted interrupt IPI */ -#ifdef CONFIG_HAVE_KVM -#define POSTED_INTR_VECTOR 0xf2 -#endif - /* * IRQ work vector: */ @@ -118,6 +113,11 @@ /* Vector on which hypervisor callbacks will be delivered */ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ From 868c00bb5980653c44d931384baa2c7f1bde81ef Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:58 -0500 Subject: [PATCH 030/481] x86/mce/amd: Rename setup_APIC_mce 'setup_APIC_mce' doesn't give us an indication of why we are going to program LVT. Make that explicit by renaming it to setup_APIC_mce_threshold so we know. No functional change is introduced. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-7-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 2e7ebe7e1e80..70e1bf6f784d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -213,7 +213,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) threshold_restart_bank(&tr); }; -static int setup_APIC_mce(int reserved, int new) +static int setup_APIC_mce_threshold(int reserved, int new) { if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0)) @@ -302,7 +302,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) b.interrupt_enable = 1; new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce(offset, new); + offset = setup_APIC_mce_threshold(offset, new); if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) From 5006c1052aafa01dab5b0e643b7dac755b41f3bb Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 May 2015 15:01:31 -0400 Subject: [PATCH 031/481] Revert "HID: logitech-hidpp: support combo keyboard touchpad TK820" This reverts commit 3a61e97563d78a2ca10752902449570d8433ce76. The Logitech TK820 seems to be affected by a firmware bug which delays the sending of the keys (pressed, or released, which triggers a key-repeat) while holding fingers on the touch sensor. This behavior can be observed while using the mouse emulation mode if the user moves the finger while typing (highly improbable though). Holding the finger still while in the mouse emulation mode does not trigger the key repeat problem. So better keep things in their previous state to not have to explain users that the new key-repeat bug they see is a "feature". Furthermore, I noticed that I disabled the media keys whith this patch. Sorry, my bad. I think it is best to revert the patch, in all the current versions it has been shipped. Cc: # v3.19 and above Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b3cf6fd4be96..5fd530acf747 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -44,7 +44,6 @@ MODULE_PARM_DESC(disable_raw_mode, /* bits 1..20 are reserved for classes */ #define HIDPP_QUIRK_DELAYED_INIT BIT(21) #define HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS BIT(22) -#define HIDPP_QUIRK_MULTI_INPUT BIT(23) /* * There are two hidpp protocols in use, the first version hidpp10 is known @@ -706,12 +705,6 @@ static int wtp_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - struct hidpp_device *hidpp = hid_get_drvdata(hdev); - - if ((hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT) && - (field->application == HID_GD_KEYBOARD)) - return 0; - return -1; } @@ -720,10 +713,6 @@ static void wtp_populate_input(struct hidpp_device *hidpp, { struct wtp_data *wd = hidpp->private_data; - if ((hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT) && origin_is_hid_core) - /* this is the generic hid-input call */ - return; - __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __clear_bit(EV_REL, input_dev->evbit); @@ -1245,10 +1234,6 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (hidpp->quirks & HIDPP_QUIRK_DELAYED_INIT) connect_mask &= ~HID_CONNECT_HIDINPUT; - /* Re-enable hidinput for multi-input devices */ - if (hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT) - connect_mask |= HID_CONNECT_HIDINPUT; - ret = hid_hw_start(hdev, connect_mask); if (ret) { hid_err(hdev, "%s:hid_hw_start returned error\n", __func__); @@ -1296,11 +1281,6 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651), .driver_data = HIDPP_QUIRK_CLASS_WTP }, - { /* Keyboard TK820 */ - HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, - USB_VENDOR_ID_LOGITECH, 0x4102), - .driver_data = HIDPP_QUIRK_DELAYED_INIT | HIDPP_QUIRK_MULTI_INPUT | - HIDPP_QUIRK_CLASS_WTP }, { HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, USB_VENDOR_ID_LOGITECH, HID_ANY_ID)}, From 956079e081427fe0c929eb284ab7e39f9b8e2023 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 6 May 2015 12:15:54 -0700 Subject: [PATCH 032/481] x86/platform/atom/punit: Add Punit device state debug driver The patch adds a debug driver, which dumps the power states of all the North complex (NC) devices. This debug interface is useful to figure out the devices, which blocks the S0ix transitions on the platform. This is extremely useful during enabling PM on customer platforms and derivatives. This submission is based on the submission from Mahesh Kumar P: https://lkml.org/lkml/2014/11/5/367 Signed-off-by: Srinivas Pandruvada Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Mahesh Kumar P Cc: Thomas Gleixner Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1430939754-6900-2-git-send-email-srinivas.pandruvada@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 ++ arch/x86/platform/Makefile | 1 + arch/x86/platform/atom/Makefile | 1 + arch/x86/platform/atom/punit_atom_debug.c | 183 ++++++++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 arch/x86/platform/atom/Makefile create mode 100644 arch/x86/platform/atom/punit_atom_debug.c diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..a5973f851750 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -332,4 +332,15 @@ config X86_DEBUG_STATIC_CPU_HAS If unsure, say N. +config PUNIT_ATOM_DEBUG + tristate "ATOM Punit debug driver" + select DEBUG_FS + select IOSF_MBI + ---help--- + This is a debug driver, which gets the power states + of all Punit North Complex devices. The power states of + each device is exposed as part of the debugfs interface. + The current power state can be read from + /sys/kernel/debug/punit_atom/dev_power_state + endmenu diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index a62e0be3a2f1..f1a6c8e86ddd 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,4 +1,5 @@ # Platform specific code goes here +obj-y += atom/ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile new file mode 100644 index 000000000000..0a3a40cbc794 --- /dev/null +++ b/arch/x86/platform/atom/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c new file mode 100644 index 000000000000..5ca8ead91579 --- /dev/null +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -0,0 +1,183 @@ +/* + * Intel SOC Punit device state debug driver + * Punit controls power management for North Complex devices (Graphics + * blocks, Image Signal Processing, video processing, display, DSP etc.) + * + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Side band Interface port */ +#define PUNIT_PORT 0x04 +/* Power gate status reg */ +#define PWRGT_STATUS 0x61 +/* Subsystem config/status Video processor */ +#define VED_SS_PM0 0x32 +/* Subsystem config/status ISP (Image Signal Processor) */ +#define ISP_SS_PM0 0x39 +/* Subsystem config/status Input/output controller */ +#define MIO_SS_PM 0x3B +/* Shift bits for getting status for video, isp and i/o */ +#define SSS_SHIFT 24 +/* Shift bits for getting status for graphics rendering */ +#define RENDER_POS 0 +/* Shift bits for getting status for media control */ +#define MEDIA_POS 2 +/* Shift bits for getting status for Valley View/Baytrail display */ +#define VLV_DISPLAY_POS 6 +/* Subsystem config/status display for Cherry Trail SOC */ +#define CHT_DSP_SSS 0x36 +/* Shift bits for getting status for display */ +#define CHT_DSP_SSS_POS 16 + +struct punit_device { + char *name; + int reg; + int sss_pos; +}; + +static const struct punit_device punit_device_byt[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const struct punit_device punit_device_cht[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"}; + +static int punit_dev_state_show(struct seq_file *seq_file, void *unused) +{ + u32 punit_pwr_status; + struct punit_device *punit_devp = seq_file->private; + int index; + int status; + + seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n"); + while (punit_devp->name) { + status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ, + punit_devp->reg, + &punit_pwr_status); + if (status) { + seq_printf(seq_file, "%9s : Read Failed\n", + punit_devp->name); + } else { + index = (punit_pwr_status >> punit_devp->sss_pos) & 3; + seq_printf(seq_file, "%9s : %s\n", punit_devp->name, + dstates[index]); + } + punit_devp++; + } + + return 0; +} + +static int punit_dev_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, punit_dev_state_show, inode->i_private); +} + +static const struct file_operations punit_dev_state_ops = { + .open = punit_dev_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *punit_dbg_file; + +static int punit_dbgfs_register(struct punit_device *punit_device) +{ + static struct dentry *dev_state; + + punit_dbg_file = debugfs_create_dir("punit_atom", NULL); + if (!punit_dbg_file) + return -ENXIO; + + dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO, + punit_dbg_file, punit_device, + &punit_dev_state_ops); + if (!dev_state) { + pr_err("punit_dev_state register failed\n"); + debugfs_remove(punit_dbg_file); + return -ENXIO; + } + + return 0; +} + +static void punit_dbgfs_unregister(void) +{ + debugfs_remove_recursive(punit_dbg_file); +} + +#define ICPU(model, drv_data) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\ + (kernel_ulong_t)&drv_data } + +static const struct x86_cpu_id intel_punit_cpu_ids[] = { + ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */ + ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */ + {} +}; + +MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); + +static int __init punit_atom_debug_init(void) +{ + const struct x86_cpu_id *id; + int ret; + + id = x86_match_cpu(intel_punit_cpu_ids); + if (!id) + return -ENODEV; + + ret = punit_dbgfs_register((struct punit_device *)id->driver_data); + if (ret < 0) + return ret; + + return 0; +} + +static void __exit punit_atom_debug_exit(void) +{ + punit_dbgfs_unregister(); +} + +module_init(punit_atom_debug_init); +module_exit(punit_atom_debug_exit); + +MODULE_AUTHOR("Kumar P, Mahesh "); +MODULE_AUTHOR("Srinivas Pandruvada "); +MODULE_DESCRIPTION("Driver for Punit devices states debugging"); +MODULE_LICENSE("GPL v2"); From 8cd161b1f755decd8b7f6c9c7144119281fe11a4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 May 2015 11:38:08 +0200 Subject: [PATCH 033/481] x86/traps: Remove superfluous weak definitions and dead code Those were leftovers of the x86 merge, see 081f75bbdc86 ("traps: x86: make traps_32.c and traps_64.c equal") for example and are not needed now. Signed-off-by: Borislav Petkov --- arch/x86/kernel/traps.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 68b1d5979a46..2768bb663f94 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -813,23 +813,6 @@ dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { conditional_sti(regs); -#if 0 - /* No need to warn about this any longer. */ - pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); -#endif -} - -asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void) -{ -} - -asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) -{ -} - -asmlinkage __visible void __attribute__((weak)) -smp_deferred_error_interrupt(void) -{ } /* From 3490c0e45f7e5a5b1e5c62e4c60b0e55b2e75e71 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 May 2015 12:06:43 +0200 Subject: [PATCH 034/481] x86/mce/amd: Zap changelog It is useless and git history has it all detailed anyway. Update copyright while at it. Signed-off-by: Borislav Petkov Cc: Aravind Gopalakrishnan --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 70e1bf6f784d..e99b15077e94 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1,21 +1,13 @@ /* - * (c) 2005-2012 Advanced Micro Devices, Inc. + * (c) 2005-2015 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html * * Written by Jacob Shin - AMD, Inc. - * * Maintained by: Borislav Petkov * - * April 2006 - * - added support for AMD Family 0x10 processors - * May 2012 - * - major scrubbing - * May 2015 - * - add support for deferred error interrupts (Aravind Gopalakrishnan) - * - * All MC4_MISCi registers are shared between multi-cores + * All MC4_MISCi registers are shared between cores on a node. */ #include #include From a3bd92519879ddc3052140e6776173f266c934eb Mon Sep 17 00:00:00 2001 From: "Fang, Yang A" Date: Thu, 23 Apr 2015 16:35:17 -0700 Subject: [PATCH 035/481] ASoC: rt5645: Add ACPI match ID This patch adds the ACPI match ID for rt5645/5650 codec Signed-off-by: Fang, Yang A Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 69528ae5410c..61ca49fbf1ec 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2656,6 +2657,15 @@ static const struct i2c_device_id rt5645_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, rt5645_i2c_id); +#ifdef CONFIG_ACPI +static struct acpi_device_id rt5645_acpi_match[] = { + { "10EC5645", 0 }, + { "10EC5650", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, rt5645_acpi_match); +#endif + static int rt5645_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -2872,6 +2882,7 @@ static struct i2c_driver rt5645_i2c_driver = { .driver = { .name = "rt5645", .owner = THIS_MODULE, + .acpi_match_table = ACPI_PTR(rt5645_acpi_match), }, .probe = rt5645_i2c_probe, .remove = rt5645_i2c_remove, From 5023281b1870fdc99bddf95a7bc00b801801d13e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 21 Apr 2015 12:19:49 +0800 Subject: [PATCH 036/481] ASoC: rt5645: Fix mask for setting RT5645_DMIC_2_DP_GPIO12 bit Current code uses wrong mask when setting RT5645_DMIC_2_DP_GPIO12 bit, fix it. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 61ca49fbf1ec..be4d741c45ba 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2780,7 +2780,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, case RT5645_DMIC_DATA_GPIO12: regmap_update_bits(rt5645->regmap, RT5645_DMIC_CTRL1, - RT5645_DMIC_1_DP_MASK, RT5645_DMIC_2_DP_GPIO12); + RT5645_DMIC_2_DP_MASK, RT5645_DMIC_2_DP_GPIO12); regmap_update_bits(rt5645->regmap, RT5645_GPIO_CTRL1, RT5645_GP12_PIN_MASK, RT5645_GP12_PIN_DMIC2_SDA); From f30bf2a5cac6c60ab366c4bc6db913597bf4d6ab Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 7 May 2015 15:12:21 +0300 Subject: [PATCH 037/481] ipvs: fix memory leak in ip_vs_ctl.c Fix memory leak introduced in commit a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct."): unreferenced object 0xffff88005785b800 (size 2048): comm "(-localed)", pid 1434, jiffies 4294755650 (age 1421.089s) hex dump (first 32 bytes): bb 89 0b 83 ff ff ff ff b0 78 f0 4e 00 88 ff ff .........x.N.... 04 00 00 00 a4 01 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc_track_caller+0x244/0x430 [] kmemdup+0x20/0x50 [] ip_vs_control_net_init+0x1f7/0x510 [] __ip_vs_init+0x100/0x250 [] ops_init+0x41/0x190 [] setup_net+0x93/0x150 [] copy_net_ns+0x82/0x140 [] create_new_namespaces+0xfd/0x190 [] unshare_nsproxy_namespaces+0x5a/0xc0 [] SyS_unshare+0x173/0x310 [] system_call_fastpath+0x12/0x6f [] 0xffffffffffffffff Fixes: a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.") Signed-off-by: Tommi Rantala Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 49532672f66d..285eae3a1454 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3823,6 +3823,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); ip_vs_stop_estimator(net, &ipvs->tot_stats); + + if (!net_eq(net, &init_net)) + kfree(ipvs->sysctl_tbl); } #else From 59aabfc7e959f5f213e4e5cc7567ab4934da2adf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 30 Apr 2015 17:12:16 -0400 Subject: [PATCH 038/481] locking/rwsem: Reduce spinlock contention in wakeup after up_read()/up_write() In up_write()/up_read(), rwsem_wake() will be called whenever it detects that some writers/readers are waiting. The rwsem_wake() function will take the wait_lock and call __rwsem_do_wake() to do the real wakeup. For a heavily contended rwsem, doing a spin_lock() on wait_lock will cause further contention on the heavily contended rwsem cacheline resulting in delay in the completion of the up_read/up_write operations. This patch makes the wait_lock taking and the call to __rwsem_do_wake() optional if at least one spinning writer is present. The spinning writer will be able to take the rwsem and call rwsem_wake() later when it calls up_write(). With the presence of a spinning writer, rwsem_wake() will now try to acquire the lock using trylock. If that fails, it will just quit. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Acked-by: Jason Low Cc: Andrew Morton Cc: Borislav Petkov Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430428337-16802-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/linux/osq_lock.h | 5 +++++ kernel/locking/rwsem-xadd.c | 44 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 3a6490e81b28..703ea5c30a33 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) extern bool osq_lock(struct optimistic_spin_queue *lock); extern void osq_unlock(struct optimistic_spin_queue *lock); +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + #endif diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 3417d0172a5d..0f189714e457 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -409,11 +409,24 @@ done: return taken; } +/* + * Return true if the rwsem has active spinner + */ +static inline bool rwsem_has_spinner(struct rw_semaphore *sem) +{ + return osq_is_locked(&sem->osq); +} + #else static bool rwsem_optimistic_spin(struct rw_semaphore *sem) { return false; } + +static inline bool rwsem_has_spinner(struct rw_semaphore *sem) +{ + return false; +} #endif /* @@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; + /* + * If a spinner is present, it is not necessary to do the wakeup. + * Try to do wakeup only if the trylock succeeds to minimize + * spinlock contention which may introduce too much delay in the + * unlock operation. + * + * spinning writer up_write/up_read caller + * --------------- ----------------------- + * [S] osq_unlock() [L] osq + * MB RMB + * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock) + * + * Here, it is important to make sure that there won't be a missed + * wakeup while the rwsem is free and the only spinning writer goes + * to sleep without taking the rwsem. Even when the spinning writer + * is just going to break out of the waiting loop, it will still do + * a trylock in rwsem_down_write_failed() before sleeping. IOW, if + * rwsem_has_spinner() is true, it will guarantee at least one + * trylock attempt on the rwsem later on. + */ + if (rwsem_has_spinner(sem)) { + /* + * The smp_rmb() here is to make sure that the spinner + * state is consulted before reading the wait_lock. + */ + smp_rmb(); + if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags)) + return sem; + goto locked; + } raw_spin_lock_irqsave(&sem->wait_lock, flags); +locked: /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) From 663fdcbee0a656cdaef934e7f50e6c2670373bc9 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Thu, 30 Apr 2015 17:27:21 +0530 Subject: [PATCH 039/481] kernel: Replace reference to ASSIGN_ONCE() with WRITE_ONCE() in comment Looks like commit : 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)") left behind a reference to ASSIGN_ONCE(). Update this to WRITE_ONCE(). Signed-off-by: Preeti U Murthy Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: borntraeger@de.ibm.com Cc: dave@stgolabs.net Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20150430115721.22278.94082.stgit@preeti.in.ibm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..a7c0941d10da 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. * - * If possible use READ_ONCE/ASSIGN_ONCE instead. + * If possible use READ_ONCE()/WRITE_ONCE() instead. */ #define __ACCESS_ONCE(x) ({ \ __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ From a33fda35e3a7655fb7df756ed67822afb5ed5e8d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:30 -0400 Subject: [PATCH 040/481] locking/qspinlock: Introduce a simple generic 4-byte queued spinlock This patch introduces a new generic queued spinlock implementation that can serve as an alternative to the default ticket spinlock. Compared with the ticket spinlock, this queued spinlock should be almost as fair as the ticket spinlock. It has about the same speed in single-thread and it can be much faster in high contention situations especially when the spinlock is embedded within the data structure to be protected. Only in light to moderate contention where the average queue depth is around 1-3 will this queued spinlock be potentially a bit slower due to the higher slowpath overhead. This queued spinlock is especially suit to NUMA machines with a large number of cores as the chance of spinlock contention is much higher in those machines. The cost of contention is also higher because of slower inter-node memory traffic. Due to the fact that spinlocks are acquired with preemption disabled, the process will not be migrated to another CPU while it is trying to get a spinlock. Ignoring interrupt handling, a CPU can only be contending in one spinlock at any one time. Counting soft IRQ, hard IRQ and NMI, a CPU can only have a maximum of 4 concurrent lock waiting activities. By allocating a set of per-cpu queue nodes and used them to form a waiting queue, we can encode the queue node address into a much smaller 24-bit size (including CPU number and queue node index) leaving one byte for the lock. Please note that the queue node is only needed when waiting for the lock. Once the lock is acquired, the queue node can be released to be used later. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 132 ++++++++++++++++ include/asm-generic/qspinlock_types.h | 58 +++++++ kernel/Kconfig.locks | 7 + kernel/locking/Makefile | 1 + kernel/locking/mcs_spinlock.h | 1 + kernel/locking/qspinlock.c | 209 ++++++++++++++++++++++++++ 6 files changed, 408 insertions(+) create mode 100644 include/asm-generic/qspinlock.h create mode 100644 include/asm-generic/qspinlock_types.h create mode 100644 kernel/locking/qspinlock.c diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h new file mode 100644 index 000000000000..569abcd47a9a --- /dev/null +++ b/include/asm-generic/qspinlock.h @@ -0,0 +1,132 @@ +/* + * Queued spinlock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long + */ +#ifndef __ASM_GENERIC_QSPINLOCK_H +#define __ASM_GENERIC_QSPINLOCK_H + +#include + +/** + * queued_spin_is_locked - is the spinlock locked? + * @lock: Pointer to queued spinlock structure + * Return: 1 if it is locked, 0 otherwise + */ +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + return atomic_read(&lock->val); +} + +/** + * queued_spin_value_unlocked - is the spinlock structure unlocked? + * @lock: queued spinlock structure + * Return: 1 if it is unlocked, 0 otherwise + * + * N.B. Whenever there are tasks waiting for the lock, it is considered + * locked wrt the lockref code to avoid lock stealing by the lockref + * code and change things underneath the lock. This also allows some + * optimizations to be applied without conflict with lockref. + */ +static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) +{ + return !atomic_read(&lock.val); +} + +/** + * queued_spin_is_contended - check if the lock is contended + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock contended, 0 otherwise + */ +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) +{ + return atomic_read(&lock->val) & ~_Q_LOCKED_MASK; +} +/** + * queued_spin_trylock - try to acquire the queued spinlock + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!atomic_read(&lock->val) && + (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0)) + return 1; + return 0; +} + +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +/** + * queued_spin_lock - acquire a queued spinlock + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + u32 val; + + val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL); + if (likely(val == 0)) + return; + queued_spin_lock_slowpath(lock, val); +} + +#ifndef queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_unlock(struct qspinlock *lock) +{ + /* + * smp_mb__before_atomic() in order to guarantee release semantics + */ + smp_mb__before_atomic_dec(); + atomic_sub(_Q_LOCKED_VAL, &lock->val); +} +#endif + +/** + * queued_spin_unlock_wait - wait until current lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +static inline void queued_spin_unlock_wait(struct qspinlock *lock) +{ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); +} + +/* + * Initializier + */ +#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } + +/* + * Remapping spinlock architecture specific functions to the corresponding + * queued spinlock functions. + */ +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) +#define arch_spin_lock_flags(l, f) queued_spin_lock(l) +#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l) + +#endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h new file mode 100644 index 000000000000..aec05c7ad2f6 --- /dev/null +++ b/include/asm-generic/qspinlock_types.h @@ -0,0 +1,58 @@ +/* + * Queued spinlock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long + */ +#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H +#define __ASM_GENERIC_QSPINLOCK_TYPES_H + +/* + * Including atomic.h with PARAVIRT on will cause compilation errors because + * of recursive header file incluson via paravirt_types.h. So don't include + * it if PARAVIRT is on. + */ +#ifndef CONFIG_PARAVIRT +#include +#include +#endif + +typedef struct qspinlock { + atomic_t val; +} arch_spinlock_t; + +/* + * Bitfields in the atomic value: + * + * 0- 7: locked byte + * 8- 9: tail index + * 10-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 8 +#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) + +#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_TAIL_IDX_BITS 2 +#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) + +#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) + +#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */ diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 08561f1acd13..95fdad866a98 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -235,6 +235,13 @@ config LOCK_SPIN_ON_OWNER def_bool y depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER +config ARCH_USE_QUEUED_SPINLOCK + bool + +config QUEUED_SPINLOCK + def_bool y if ARCH_USE_QUEUED_SPINLOCK + depends on SMP && !PARAVIRT_SPINLOCKS + config ARCH_USE_QUEUE_RWLOCK bool diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index de7a416cca2a..abfcef3c1ef9 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o obj-$(CONFIG_SMP) += lglock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_QUEUED_SPINLOCK) += qspinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 75e114bdf3f2..fd91aaa4554c 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -17,6 +17,7 @@ struct mcs_spinlock { struct mcs_spinlock *next; int locked; /* 1 if lock acquired */ + int count; /* nesting count, see qspinlock.c */ }; #ifndef arch_mcs_spin_lock_contended diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c new file mode 100644 index 000000000000..029b51ce10ea --- /dev/null +++ b/kernel/locking/qspinlock.c @@ -0,0 +1,209 @@ +/* + * Queued spinlock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * (C) Copyright 2013-2014 Red Hat, Inc. + * (C) Copyright 2015 Intel Corp. + * + * Authors: Waiman Long + * Peter Zijlstra + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The basic principle of a queue-based spinlock can best be understood + * by studying a classic queue-based spinlock implementation called the + * MCS lock. The paper below provides a good description for this kind + * of lock. + * + * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf + * + * This queued spinlock implementation is based on the MCS lock, however to make + * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing + * API, we must modify it somehow. + * + * In particular; where the traditional MCS lock consists of a tail pointer + * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to + * unlock the next pending (next->locked), we compress both these: {tail, + * next->locked} into a single u32 value. + * + * Since a spinlock disables recursion of its own context and there is a limit + * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there + * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now + * we can encode the tail by combining the 2-bit nesting level with the cpu + * number. With one byte for the lock value and 3 bytes for the tail, only a + * 32-bit word is now needed. Even though we only need 1 bit for the lock, + * we extend it to a full byte to achieve better performance for architectures + * that support atomic byte write. + * + * We also change the first spinner to spin on the lock bit instead of its + * node; whereby avoiding the need to carry a node from lock to unlock, and + * preserving existing lock API. This also makes the unlock code simpler and + * faster. + */ + +#include "mcs_spinlock.h" + +/* + * Per-CPU queue node structures; we can never have more than 4 nested + * contexts: task, softirq, hardirq, nmi. + * + * Exactly fits one 64-byte cacheline on a 64-bit architecture. + */ +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); + +/* + * We must be able to distinguish between no-tail and the tail at 0:0, + * therefore increment the cpu number by one. + */ + +static inline u32 encode_tail(int cpu, int idx) +{ + u32 tail; + +#ifdef CONFIG_DEBUG_SPINLOCK + BUG_ON(idx > 3); +#endif + tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; + tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ + + return tail; +} + +static inline struct mcs_spinlock *decode_tail(u32 tail) +{ + int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; + int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; + + return per_cpu_ptr(&mcs_nodes[idx], cpu); +} + +/** + * queued_spin_lock_slowpath - acquire the queued spinlock + * @lock: Pointer to queued spinlock structure + * @val: Current value of the queued spinlock 32-bit word + * + * (queue tail, lock value) + * + * fast : slow : unlock + * : : + * uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0) + * : | ^--------. / : + * : v \ | : + * uncontended : (n,x) --+--> (n,0) | : + * queue : | ^--' | : + * : v | : + * contended : (*,x) --+--> (*,0) -----> (*,1) ---' : + * queue : ^--' : + * + */ +void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + struct mcs_spinlock *prev, *next, *node; + u32 new, old, tail; + int idx; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + node = this_cpu_ptr(&mcs_nodes[0]); + idx = node->count++; + tail = encode_tail(smp_processor_id(), idx); + + node += idx; + node->locked = 0; + node->next = NULL; + + /* + * trylock || xchg(lock, node) + * + * 0,0 -> 0,1 ; no tail, not locked -> no tail, locked. + * p,x -> n,x ; tail was p -> tail is n; preserving locked. + */ + for (;;) { + new = _Q_LOCKED_VAL; + if (val) + new = tail | (val & _Q_LOCKED_MASK); + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + + /* + * we won the trylock; forget about queueing. + */ + if (new == _Q_LOCKED_VAL) + goto release; + + /* + * if there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & ~_Q_LOCKED_MASK) { + prev = decode_tail(old); + WRITE_ONCE(prev->next, node); + + arch_mcs_spin_lock_contended(&node->locked); + } + + /* + * we're at the head of the waitqueue, wait for the owner to go away. + * + * *,x -> *,0 + */ + while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + cpu_relax(); + + /* + * claim the lock: + * + * n,0 -> 0,1 : lock, uncontended + * *,0 -> *,1 : lock, contended + */ + for (;;) { + new = _Q_LOCKED_VAL; + if (val != tail) + new |= val; + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + + /* + * contended path; wait for next, release. + */ + if (new != _Q_LOCKED_VAL) { + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + + arch_mcs_spin_unlock_contended(&next->locked); + } + +release: + /* + * release the node + */ + this_cpu_dec(mcs_nodes[0].count); +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); From d73a33973f16ab6703e75ea00edee857afa3406e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:31 -0400 Subject: [PATCH 041/481] locking/qspinlock, x86: Enable x86-64 to use queued spinlocks This patch makes the necessary changes at the x86 architecture specific layer to enable the use of queued spinlocks for x86-64. As x86-32 machines are typically not multi-socket. The benefit of queue spinlock may not be apparent. So queued spinlocks are not enabled. Currently, there is some incompatibilities between the para-virtualized spinlock code (which hard-codes the use of ticket spinlock) and the queued spinlocks. Therefore, the use of queued spinlocks is disabled when the para-virtualized spinlock is enabled. The arch/x86/include/asm/qspinlock.h header file includes some x86 specific optimization which will make the queueds spinlock code perform better than the generic implementation. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-3-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/qspinlock.h | 20 ++++++++++++++++++++ arch/x86/include/asm/spinlock.h | 5 +++++ arch/x86/include/asm/spinlock_types.h | 4 ++++ 4 files changed, 30 insertions(+) create mode 100644 arch/x86/include/asm/qspinlock.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..90b1b54f4f38 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -127,6 +127,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_QUEUED_SPINLOCK select ARCH_USE_QUEUE_RWLOCK select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h new file mode 100644 index 000000000000..e2aee8273664 --- /dev/null +++ b/arch/x86/include/asm/qspinlock.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_QSPINLOCK_H +#define _ASM_X86_QSPINLOCK_H + +#include + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release((u8 *)lock, 0); +} + +#include + +#endif /* _ASM_X86_QSPINLOCK_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 64b611782ef0..4ec5413156ca 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -42,6 +42,10 @@ extern struct static_key paravirt_ticketlocks_enabled; static __always_inline bool static_key_false(struct static_key *key); +#ifdef CONFIG_QUEUED_SPINLOCK +#include +#else + #ifdef CONFIG_PARAVIRT_SPINLOCKS static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) @@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } } +#endif /* CONFIG_QUEUED_SPINLOCK */ /* * Read-write spinlocks, allowing multiple readers diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 5f9d7572d82b..5df1f1b9a4b0 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -23,6 +23,9 @@ typedef u32 __ticketpair_t; #define TICKET_SHIFT (sizeof(__ticket_t) * 8) +#ifdef CONFIG_QUEUED_SPINLOCK +#include +#else typedef struct arch_spinlock { union { __ticketpair_t head_tail; @@ -33,6 +36,7 @@ typedef struct arch_spinlock { } arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } +#endif /* CONFIG_QUEUED_SPINLOCK */ #include From c1fb159db9f2e50e0f4025bed92a67a6a7bfa7b7 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 24 Apr 2015 14:56:32 -0400 Subject: [PATCH 042/481] locking/qspinlock: Add pending bit Because the qspinlock needs to touch a second cacheline (the per-cpu mcs_nodes[]); add a pending bit and allow a single in-word spinner before we punt to the second cacheline. It is possible so observe the pending bit without the locked bit when the last owner has just released but the pending owner has not yet taken ownership. In this case we would normally queue -- because the pending bit is already taken. However, in this case the pending bit is guaranteed to be released 'soon', therefore wait for it and avoid queueing. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock_types.h | 12 ++- kernel/locking/qspinlock.c | 119 +++++++++++++++++++++----- 2 files changed, 107 insertions(+), 24 deletions(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index aec05c7ad2f6..7ee6632cb818 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -36,8 +36,9 @@ typedef struct qspinlock { * Bitfields in the atomic value: * * 0- 7: locked byte - * 8- 9: tail index - * 10-31: tail cpu (+1) + * 8: pending + * 9-10: tail index + * 11-31: tail cpu (+1) */ #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ << _Q_ ## type ## _OFFSET) @@ -45,7 +46,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -54,5 +59,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 029b51ce10ea..af9c2ef6e930 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -94,24 +94,28 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) return per_cpu_ptr(&mcs_nodes[idx], cpu); } +#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure * @val: Current value of the queued spinlock 32-bit word * - * (queue tail, lock value) - * - * fast : slow : unlock - * : : - * uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0) - * : | ^--------. / : - * : v \ | : - * uncontended : (n,x) --+--> (n,0) | : - * queue : | ^--' | : - * : v | : - * contended : (*,x) --+--> (*,0) -----> (*,1) ---' : - * queue : ^--' : + * (queue tail, pending bit, lock value) * + * fast : slow : unlock + * : : + * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0) + * : | ^--------.------. / : + * : v \ \ | : + * pending : (0,1,1) +--> (0,1,0) \ | : + * : | ^--' | | : + * : v | | : + * uncontended : (n,x,y) +--> (n,0,0) --' | : + * queue : | ^--' | : + * : v | : + * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : + * queue : ^--' : */ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { @@ -121,6 +125,75 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + /* + * wait for in-progress pending->locked hand-overs + * + * 0,1,0 -> 0,0,1 + */ + if (val == _Q_PENDING_VAL) { + while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) + cpu_relax(); + } + + /* + * trylock || pending + * + * 0,0,0 -> 0,0,1 ; trylock + * 0,0,1 -> 0,1,1 ; pending + */ + for (;;) { + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + new = _Q_LOCKED_VAL; + if (val == new) + new |= _Q_PENDING_VAL; + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + + /* + * we won the trylock + */ + if (new == _Q_LOCKED_VAL) + return; + + /* + * we're pending, wait for the owner to go away. + * + * *,1,1 -> *,1,0 + */ + while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + cpu_relax(); + + /* + * take ownership and clear the pending bit. + * + * *,1,0 -> *,0,1 + */ + for (;;) { + new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + return; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: node = this_cpu_ptr(&mcs_nodes[0]); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); @@ -130,15 +203,18 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) node->next = NULL; /* + * We have already touched the queueing cacheline; don't bother with + * pending stuff. + * * trylock || xchg(lock, node) * - * 0,0 -> 0,1 ; no tail, not locked -> no tail, locked. - * p,x -> n,x ; tail was p -> tail is n; preserving locked. + * 0,0,0 -> 0,0,1 ; no tail, not locked -> no tail, locked. + * p,y,x -> n,y,x ; tail was p -> tail is n; preserving locked. */ for (;;) { new = _Q_LOCKED_VAL; if (val) - new = tail | (val & _Q_LOCKED_MASK); + new = tail | (val & _Q_LOCKED_PENDING_MASK); old = atomic_cmpxchg(&lock->val, val, new); if (old == val) @@ -157,7 +233,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * if there was a previous node; link it and wait until reaching the * head of the waitqueue. */ - if (old & ~_Q_LOCKED_MASK) { + if (old & ~_Q_LOCKED_PENDING_MASK) { prev = decode_tail(old); WRITE_ONCE(prev->next, node); @@ -165,18 +241,19 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) } /* - * we're at the head of the waitqueue, wait for the owner to go away. + * we're at the head of the waitqueue, wait for the owner & pending to + * go away. * - * *,x -> *,0 + * *,x,y -> *,0,0 */ - while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK) cpu_relax(); /* * claim the lock: * - * n,0 -> 0,1 : lock, uncontended - * *,0 -> *,1 : lock, contended + * n,0,0 -> 0,0,1 : lock, uncontended + * *,0,0 -> *,0,1 : lock, contended */ for (;;) { new = _Q_LOCKED_VAL; From 6403bd7d0ea1878a487296114eccf78658d7dd7a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:33 -0400 Subject: [PATCH 043/481] locking/qspinlock: Extract out code snippets for the next patch This is a preparatory patch that extracts out the following 2 code snippets to prepare for the next performance optimization patch. 1) the logic for the exchange of new and previous tail code words into a new xchg_tail() function. 2) the logic for clearing the pending bit and setting the locked bit into a new clear_pending_set_locked() function. This patch also simplifies the trylock operation before queuing by calling queued_spin_trylock() directly. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-5-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock_types.h | 2 + kernel/locking/qspinlock.c | 79 ++++++++++++++++----------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 7ee6632cb818..3a7f67173bd0 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -58,6 +58,8 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) #define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index af9c2ef6e930..82bb4a9e9009 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -96,6 +96,42 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + */ +static __always_inline void clear_pending_set_locked(struct qspinlock *lock) +{ + atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); +} + +/** + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail) + * + * p,*,* -> n,*,* ; prev = xchg(lock, node) + */ +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) +{ + u32 old, new, val = atomic_read(&lock->val); + + for (;;) { + new = (val & _Q_LOCKED_PENDING_MASK) | tail; + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + return old; +} + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -178,15 +214,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * * *,1,0 -> *,0,1 */ - for (;;) { - new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; - - old = atomic_cmpxchg(&lock->val, val, new); - if (old == val) - break; - - val = old; - } + clear_pending_set_locked(lock); return; /* @@ -202,38 +230,27 @@ queue: node->locked = 0; node->next = NULL; + /* + * We touched a (possibly) cold cacheline in the per-cpu queue node; + * attempt the trylock once more in the hope someone let go while we + * weren't watching. + */ + if (queued_spin_trylock(lock)) + goto release; + /* * We have already touched the queueing cacheline; don't bother with * pending stuff. * - * trylock || xchg(lock, node) - * - * 0,0,0 -> 0,0,1 ; no tail, not locked -> no tail, locked. - * p,y,x -> n,y,x ; tail was p -> tail is n; preserving locked. + * p,*,* -> n,*,* */ - for (;;) { - new = _Q_LOCKED_VAL; - if (val) - new = tail | (val & _Q_LOCKED_PENDING_MASK); - - old = atomic_cmpxchg(&lock->val, val, new); - if (old == val) - break; - - val = old; - } - - /* - * we won the trylock; forget about queueing. - */ - if (new == _Q_LOCKED_VAL) - goto release; + old = xchg_tail(lock, tail); /* * if there was a previous node; link it and wait until reaching the * head of the waitqueue. */ - if (old & ~_Q_LOCKED_PENDING_MASK) { + if (old & _Q_TAIL_MASK) { prev = decode_tail(old); WRITE_ONCE(prev->next, node); From 69f9cae90907e09af95fb991ed384670cef8dd32 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 24 Apr 2015 14:56:34 -0400 Subject: [PATCH 044/481] locking/qspinlock: Optimize for smaller NR_CPUS When we allow for a max NR_CPUS < 2^14 we can optimize the pending wait-acquire and the xchg_tail() operations. By growing the pending bit to a byte, we reduce the tail to 16bit. This means we can use xchg16 for the tail part and do away with all the repeated compxchg() operations. This in turn allows us to unconditionally acquire; the locked state as observed by the wait loops cannot change. And because both locked and pending are now a full byte we can use simple stores for the state transition, obviating one atomic operation entirely. This optimization is needed to make the qspinlock achieve performance parity with ticket spinlock at light load. All this is horribly broken on Alpha pre EV56 (and any other arch that cannot do single-copy atomic byte stores). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-6-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock_types.h | 13 +++++ kernel/locking/qspinlock.c | 69 ++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 3a7f67173bd0..85f888e86761 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -35,6 +35,14 @@ typedef struct qspinlock { /* * Bitfields in the atomic value: * + * When NR_CPUS < 16K + * 0- 7: locked byte + * 8: pending + * 9-15: not used + * 16-17: tail index + * 18-31: tail cpu (+1) + * + * When NR_CPUS >= 16K * 0- 7: locked byte * 8: pending * 9-10: tail index @@ -47,7 +55,11 @@ typedef struct qspinlock { #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#if CONFIG_NR_CPUS < (1U << 14) +#define _Q_PENDING_BITS 8 +#else #define _Q_PENDING_BITS 1 +#endif #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) @@ -58,6 +70,7 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) +#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET #define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 82bb4a9e9009..e17efe7b8d4d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* @@ -56,6 +57,10 @@ * node; whereby avoiding the need to carry a node from lock to unlock, and * preserving existing lock API. This also makes the unlock code simpler and * faster. + * + * N.B. The current implementation only supports architectures that allow + * atomic operations on smaller 8-bit and 16-bit data types. + * */ #include "mcs_spinlock.h" @@ -96,6 +101,62 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) +/* + * By using the whole 2nd least significant byte for the pending bit, we + * can allow better optimization of the lock acquisition for the pending + * bit holder. + */ +#if _Q_PENDING_BITS == 8 + +struct __qspinlock { + union { + atomic_t val; + struct { +#ifdef __LITTLE_ENDIAN + u16 locked_pending; + u16 tail; +#else + u16 tail; + u16 locked_pending; +#endif + }; + }; +}; + +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + * + * Lock stealing is not allowed if this function is used. + */ +static __always_inline void clear_pending_set_locked(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); +} + +/* + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail) + * + * p,*,* -> n,*,* ; prev = xchg(lock, node) + */ +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) +{ + struct __qspinlock *l = (void *)lock; + + return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; +} + +#else /* _Q_PENDING_BITS == 8 */ + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -131,6 +192,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) } return old; } +#endif /* _Q_PENDING_BITS == 8 */ /** * queued_spin_lock_slowpath - acquire the queued spinlock @@ -205,8 +267,13 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * we're pending, wait for the owner to go away. * * *,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all clear_pending_set_locked() + * implementations imply full barriers. */ - while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK) cpu_relax(); /* From 2c83e8e9492dc823be1d96d4c5ef75d16d3866a0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:35 -0400 Subject: [PATCH 045/481] locking/qspinlock: Use a simple write to grab the lock Currently, atomic_cmpxchg() is used to get the lock. However, this is not really necessary if there is more than one task in the queue and the queue head don't need to reset the tail code. For that case, a simple write to set the lock bit is enough as the queue head will be the only one eligible to get the lock as long as it checks that both the lock and pending bits are not set. The current pending bit waiting code will ensure that the bit will not be set as soon as the tail code in the lock is set. With that change, the are some slight improvement in the performance of the queued spinlock in the 5M loop micro-benchmark run on a 4-socket Westere-EX machine as shown in the tables below. [Standalone/Embedded - same node] # of tasks Before patch After patch %Change ---------- ----------- ---------- ------- 3 2324/2321 2248/2265 -3%/-2% 4 2890/2896 2819/2831 -2%/-2% 5 3611/3595 3522/3512 -2%/-2% 6 4281/4276 4173/4160 -3%/-3% 7 5018/5001 4875/4861 -3%/-3% 8 5759/5750 5563/5568 -3%/-3% [Standalone/Embedded - different nodes] # of tasks Before patch After patch %Change ---------- ----------- ---------- ------- 3 12242/12237 12087/12093 -1%/-1% 4 10688/10696 10507/10521 -2%/-2% It was also found that this change produced a much bigger performance improvement in the newer IvyBridge-EX chip and was essentially to close the performance gap between the ticket spinlock and queued spinlock. The disk workload of the AIM7 benchmark was run on a 4-socket Westmere-EX machine with both ext4 and xfs RAM disks at 3000 users on a 3.14 based kernel. The results of the test runs were: AIM7 XFS Disk Test kernel JPM Real Time Sys Time Usr Time ----- --- --------- -------- -------- ticketlock 5678233 3.17 96.61 5.81 qspinlock 5750799 3.13 94.83 5.97 AIM7 EXT4 Disk Test kernel JPM Real Time Sys Time Usr Time ----- --- --------- -------- -------- ticketlock 1114551 16.15 509.72 7.11 qspinlock 2184466 8.24 232.99 6.01 The ext4 filesystem run had a much higher spinlock contention than the xfs filesystem run. The "ebizzy -m" test was also run with the following results: kernel records/s Real Time Sys Time Usr Time ----- --------- --------- -------- -------- ticketlock 2075 10.00 216.35 3.49 qspinlock 3023 10.00 198.20 4.80 Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-7-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 76 +++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index e17efe7b8d4d..033872113ebb 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -105,24 +105,37 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) * By using the whole 2nd least significant byte for the pending bit, we * can allow better optimization of the lock acquisition for the pending * bit holder. + * + * This internal structure is also used by the set_locked function which + * is not restricted to _Q_PENDING_BITS == 8. */ -#if _Q_PENDING_BITS == 8 - struct __qspinlock { union { atomic_t val; - struct { #ifdef __LITTLE_ENDIAN - u16 locked_pending; - u16 tail; -#else - u16 tail; - u16 locked_pending; -#endif + struct { + u8 locked; + u8 pending; }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif }; }; +#if _Q_PENDING_BITS == 8 /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -194,6 +207,19 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) } #endif /* _Q_PENDING_BITS == 8 */ +/** + * set_locked - Set the lock bit and own the lock + * @lock: Pointer to queued spinlock structure + * + * *,*,0 -> *,0,1 + */ +static __always_inline void set_locked(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->locked, _Q_LOCKED_VAL); +} + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -329,8 +355,14 @@ queue: * go away. * * *,x,y -> *,0,0 + * + * this wait loop must use a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because the set_locked() function below + * does not imply a full barrier. + * */ - while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK) + while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK) cpu_relax(); /* @@ -338,15 +370,19 @@ queue: * * n,0,0 -> 0,0,1 : lock, uncontended * *,0,0 -> *,0,1 : lock, contended + * + * If the queue head is the only one in the queue (lock value == tail), + * clear the tail code and grab the lock. Otherwise, we only need + * to grab the lock. */ for (;;) { - new = _Q_LOCKED_VAL; - if (val != tail) - new |= val; - - old = atomic_cmpxchg(&lock->val, val, new); - if (old == val) + if (val != tail) { + set_locked(lock); break; + } + old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL); + if (old == val) + goto release; /* No contention */ val = old; } @@ -354,12 +390,10 @@ queue: /* * contended path; wait for next, release. */ - if (new != _Q_LOCKED_VAL) { - while (!(next = READ_ONCE(node->next))) - cpu_relax(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); - arch_mcs_spin_unlock_contended(&next->locked); - } + arch_mcs_spin_unlock_contended(&next->locked); release: /* From 2aa79af64263190eec610422b07f60e99a7d230a Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 24 Apr 2015 14:56:36 -0400 Subject: [PATCH 046/481] locking/qspinlock: Revert to test-and-set on hypervisors When we detect a hypervisor (!paravirt, see qspinlock paravirt support patches), revert to a simple test-and-set lock to avoid the horrors of queue preemption. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-8-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 14 ++++++++++++++ include/asm-generic/qspinlock.h | 7 +++++++ kernel/locking/qspinlock.c | 3 +++ 3 files changed, 24 insertions(+) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index e2aee8273664..f079b7020e3f 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_QSPINLOCK_H #define _ASM_X86_QSPINLOCK_H +#include #include #define queued_spin_unlock queued_spin_unlock @@ -15,6 +16,19 @@ static inline void queued_spin_unlock(struct qspinlock *lock) smp_store_release((u8 *)lock, 0); } +#define virt_queued_spin_lock virt_queued_spin_lock + +static inline bool virt_queued_spin_lock(struct qspinlock *lock) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + return false; + + while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) + cpu_relax(); + + return true; +} + #include #endif /* _ASM_X86_QSPINLOCK_H */ diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 569abcd47a9a..83bfb87f5bf1 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,6 +111,13 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); } +#ifndef virt_queued_spin_lock +static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +{ + return false; +} +#endif + /* * Initializier */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 033872113ebb..fd31a474145d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -249,6 +249,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + if (virt_queued_spin_lock(lock)) + return; + /* * wait for in-progress pending->locked hand-overs * From a23db284fe0d1879ca2002bf31077b5efa2fe2ca Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:37 -0400 Subject: [PATCH 047/481] locking/pvqspinlock: Implement simple paravirt support for the qspinlock Provide a separate (second) version of the spin_lock_slowpath for paravirt along with a special unlock path. The second slowpath is generated by adding a few pv hooks to the normal slowpath, but where those will compile away for the native case, they expand into special wait/wake code for the pv version. The actual MCS queue can use extra storage in the mcs_nodes[] array to keep track of state and therefore uses directed wakeups. The head contender has no such storage directly visible to the unlocker. So the unlocker searches a hash table with open addressing using a simple binary Galois linear feedback shift register. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429901803-29771-9-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 68 +++++- kernel/locking/qspinlock_paravirt.h | 325 ++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 kernel/locking/qspinlock_paravirt.h diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index fd31a474145d..38c49202d532 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -18,6 +18,9 @@ * Authors: Waiman Long * Peter Zijlstra */ + +#ifndef _GEN_PV_LOCK_SLOWPATH + #include #include #include @@ -65,13 +68,21 @@ #include "mcs_spinlock.h" +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define MAX_NODES 8 +#else +#define MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,32 @@ static __always_inline void set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked, _Q_LOCKED_VAL); } + +/* + * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for + * all the PV callbacks. + */ + +static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } +static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } +static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { } + +static __always_inline void __pv_wait_head(struct qspinlock *lock, + struct mcs_spinlock *node) { } + +#define pv_enabled() false + +#define pv_init_node __pv_init_node +#define pv_wait_node __pv_wait_node +#define pv_kick_node __pv_kick_node +#define pv_wait_head __pv_wait_head + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath +#endif + +#endif /* _GEN_PV_LOCK_SLOWPATH */ + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -249,6 +286,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + if (pv_enabled()) + goto queue; + if (virt_queued_spin_lock(lock)) return; @@ -325,6 +365,7 @@ queue: node += idx; node->locked = 0; node->next = NULL; + pv_init_node(node); /* * We touched a (possibly) cold cacheline in the per-cpu queue node; @@ -350,6 +391,7 @@ queue: prev = decode_tail(old); WRITE_ONCE(prev->next, node); + pv_wait_node(node); arch_mcs_spin_lock_contended(&node->locked); } @@ -365,6 +407,7 @@ queue: * does not imply a full barrier. * */ + pv_wait_head(lock, node); while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK) cpu_relax(); @@ -397,6 +440,7 @@ queue: cpu_relax(); arch_mcs_spin_unlock_contended(&next->locked); + pv_kick_node(next); release: /* @@ -405,3 +449,25 @@ release: this_cpu_dec(mcs_nodes[0].count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); + +/* + * Generate the paravirt code for queued_spin_unlock_slowpath(). + */ +#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#define _GEN_PV_LOCK_SLOWPATH + +#undef pv_enabled +#define pv_enabled() true + +#undef pv_init_node +#undef pv_wait_node +#undef pv_kick_node +#undef pv_wait_head + +#undef queued_spin_lock_slowpath +#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath + +#include "qspinlock_paravirt.h" +#include "qspinlock.c" + +#endif diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h new file mode 100644 index 000000000000..b5758a95a8d3 --- /dev/null +++ b/kernel/locking/qspinlock_paravirt.h @@ -0,0 +1,325 @@ +#ifndef _GEN_PV_LOCK_SLOWPATH +#error "do not include this file" +#endif + +#include +#include + +/* + * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead + * of spinning them. + * + * This relies on the architecture to provide two paravirt hypercalls: + * + * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val + * pv_kick(cpu) -- wakes a suspended vcpu + * + * Using these we implement __pv_queued_spin_lock_slowpath() and + * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and + * native_queued_spin_unlock(). + */ + +#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) + +enum vcpu_state { + vcpu_running = 0, + vcpu_halted, +}; + +struct pv_node { + struct mcs_spinlock mcs; + struct mcs_spinlock __res[3]; + + int cpu; + u8 state; +}; + +/* + * Lock and MCS node addresses hash table for fast lookup + * + * Hashing is done on a per-cacheline basis to minimize the need to access + * more than one cacheline. + * + * Dynamically allocate a hash table big enough to hold at least 4X the + * number of possible cpus in the system. Allocation is done on page + * granularity. So the minimum number of hash buckets should be at least + * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. + * + * Since we should not be holding locks from NMI context (very rare indeed) the + * max load factor is 0.75, which is around the point where open addressing + * breaks down. + * + */ +struct pv_hash_entry { + struct qspinlock *lock; + struct pv_node *node; +}; + +#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) +#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry)) + +static struct pv_hash_entry *pv_lock_hash; +static unsigned int pv_lock_hash_bits __read_mostly; + +/* + * Allocate memory for the PV qspinlock hash buckets + * + * This function should be called from the paravirt spinlock initialization + * routine. + */ +void __init __pv_init_lock_hash(void) +{ + int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); + + if (pv_hash_size < PV_HE_MIN) + pv_hash_size = PV_HE_MIN; + + /* + * Allocate space from bootmem which should be page-size aligned + * and hence cacheline aligned. + */ + pv_lock_hash = alloc_large_system_hash("PV qspinlock", + sizeof(struct pv_hash_entry), + pv_hash_size, 0, HASH_EARLY, + &pv_lock_hash_bits, NULL, + pv_hash_size, pv_hash_size); +} + +#define for_each_hash_entry(he, offset, hash) \ + for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0; \ + offset < (1 << pv_lock_hash_bits); \ + offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)]) + +static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) +{ + unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + + for_each_hash_entry(he, offset, hash) { + if (!cmpxchg(&he->lock, NULL, lock)) { + WRITE_ONCE(he->node, node); + return &he->lock; + } + } + /* + * Hard assume there is a free entry for us. + * + * This is guaranteed by ensuring every blocked lock only ever consumes + * a single entry, and since we only have 4 nesting levels per CPU + * and allocated 4*nr_possible_cpus(), this must be so. + * + * The single entry is guaranteed by having the lock owner unhash + * before it releases. + */ + BUG(); +} + +static struct pv_node *pv_unhash(struct qspinlock *lock) +{ + unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + struct pv_node *node; + + for_each_hash_entry(he, offset, hash) { + if (READ_ONCE(he->lock) == lock) { + node = READ_ONCE(he->node); + WRITE_ONCE(he->lock, NULL); + return node; + } + } + /* + * Hard assume we'll find an entry. + * + * This guarantees a limited lookup time and is itself guaranteed by + * having the lock owner do the unhash -- IFF the unlock sees the + * SLOW flag, there MUST be a hash entry. + */ + BUG(); +} + +/* + * Initialize the PV part of the mcs_spinlock node. + */ +static void pv_init_node(struct mcs_spinlock *node) +{ + struct pv_node *pn = (struct pv_node *)node; + + BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock)); + + pn->cpu = smp_processor_id(); + pn->state = vcpu_running; +} + +/* + * Wait for node->locked to become true, halt the vcpu after a short spin. + * pv_kick_node() is used to wake the vcpu again. + */ +static void pv_wait_node(struct mcs_spinlock *node) +{ + struct pv_node *pn = (struct pv_node *)node; + int loop; + + for (;;) { + for (loop = SPIN_THRESHOLD; loop; loop--) { + if (READ_ONCE(node->locked)) + return; + cpu_relax(); + } + + /* + * Order pn->state vs pn->locked thusly: + * + * [S] pn->state = vcpu_halted [S] next->locked = 1 + * MB MB + * [L] pn->locked [RmW] pn->state = vcpu_running + * + * Matches the xchg() from pv_kick_node(). + */ + (void)xchg(&pn->state, vcpu_halted); + + if (!READ_ONCE(node->locked)) + pv_wait(&pn->state, vcpu_halted); + + /* + * Reset the vCPU state to avoid unncessary CPU kicking + */ + WRITE_ONCE(pn->state, vcpu_running); + + /* + * If the locked flag is still not set after wakeup, it is a + * spurious wakeup and the vCPU should wait again. However, + * there is a pretty high overhead for CPU halting and kicking. + * So it is better to spin for a while in the hope that the + * MCS lock will be released soon. + */ + } + /* + * By now our node->locked should be 1 and our caller will not actually + * spin-wait for it. We do however rely on our caller to do a + * load-acquire for us. + */ +} + +/* + * Called after setting next->locked = 1, used to wake those stuck in + * pv_wait_node(). + */ +static void pv_kick_node(struct mcs_spinlock *node) +{ + struct pv_node *pn = (struct pv_node *)node; + + /* + * Note that because node->locked is already set, this actual + * mcs_spinlock entry could be re-used already. + * + * This should be fine however, kicking people for no reason is + * harmless. + * + * See the comment in pv_wait_node(). + */ + if (xchg(&pn->state, vcpu_running) == vcpu_halted) + pv_kick(pn->cpu); +} + +/* + * Wait for l->locked to become clear; halt the vcpu after a short spin. + * __pv_queued_spin_unlock() will wake us. + */ +static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) +{ + struct pv_node *pn = (struct pv_node *)node; + struct __qspinlock *l = (void *)lock; + struct qspinlock **lp = NULL; + int loop; + + for (;;) { + for (loop = SPIN_THRESHOLD; loop; loop--) { + if (!READ_ONCE(l->locked)) + return; + cpu_relax(); + } + + WRITE_ONCE(pn->state, vcpu_halted); + if (!lp) { /* ONCE */ + lp = pv_hash(lock, pn); + /* + * lp must be set before setting _Q_SLOW_VAL + * + * [S] lp = lock [RmW] l = l->locked = 0 + * MB MB + * [S] l->locked = _Q_SLOW_VAL [L] lp + * + * Matches the cmpxchg() in __pv_queued_spin_unlock(). + */ + if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) { + /* + * The lock is free and _Q_SLOW_VAL has never + * been set. Therefore we need to unhash before + * getting the lock. + */ + WRITE_ONCE(*lp, NULL); + return; + } + } + pv_wait(&l->locked, _Q_SLOW_VAL); + + /* + * The unlocker should have freed the lock before kicking the + * CPU. So if the lock is still not free, it is a spurious + * wakeup and so the vCPU should wait again after spinning for + * a while. + */ + } + + /* + * Lock is unlocked now; the caller will acquire it without waiting. + * As with pv_wait_node() we rely on the caller to do a load-acquire + * for us. + */ +} + +/* + * PV version of the unlock function to be used in stead of + * queued_spin_unlock(). + */ +__visible void __pv_queued_spin_unlock(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + struct pv_node *node; + + /* + * We must not unlock if SLOW, because in that case we must first + * unhash. Otherwise it would be possible to have multiple @lock + * entries, which would be BAD. + */ + if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL)) + return; + + /* + * Since the above failed to release, this must be the SLOW path. + * Therefore start by looking up the blocked node and unhashing it. + */ + node = pv_unhash(lock); + + /* + * Now that we have a reference to the (likely) blocked pv_node, + * release the lock. + */ + smp_store_release(&l->locked, 0); + + /* + * At this point the memory pointed at by lock can be freed/reused, + * however we can still use the pv_node to kick the CPU. + */ + if (READ_ONCE(node->state) == vcpu_halted) + pv_kick(node->cpu); +} +/* + * Include the architecture specific callee-save thunk of the + * __pv_queued_spin_unlock(). This thunk is put together with + * __pv_queued_spin_unlock() near the top of the file to make sure + * that the callee-save thunk and the real unlock function are close + * to each other sharing consecutive instruction cachelines. + */ +#include + From f233f7f1581e78fd9b4023f2e7d8c1ed89020cc9 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 24 Apr 2015 14:56:38 -0400 Subject: [PATCH 048/481] locking/pvqspinlock, x86: Implement the paravirt qspinlock call patching We use the regular paravirt call patching to switch between: native_queued_spin_lock_slowpath() __pv_queued_spin_lock_slowpath() native_queued_spin_unlock() __pv_queued_spin_unlock() We use a callee saved call for the unlock function which reduces the i-cache footprint and allows 'inlining' of SPIN_UNLOCK functions again. We further optimize the unlock path by patching the direct call with a "movb $0,%arg1" if we are indeed using the native unlock code. This makes the unlock code almost as fast as the !PARAVIRT case. This significantly lowers the overhead of having CONFIG_PARAVIRT_SPINLOCKS enabled, even for native code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-10-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/paravirt.h | 29 ++++++++++++++++++++++- arch/x86/include/asm/paravirt_types.h | 10 ++++++++ arch/x86/include/asm/qspinlock.h | 25 ++++++++++++++++++- arch/x86/include/asm/qspinlock_paravirt.h | 6 +++++ arch/x86/kernel/paravirt-spinlocks.c | 24 ++++++++++++++++++- arch/x86/kernel/paravirt_patch_32.c | 22 +++++++++++++---- arch/x86/kernel/paravirt_patch_64.c | 22 +++++++++++++---- 8 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 arch/x86/include/asm/qspinlock_paravirt.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 90b1b54f4f38..50ec043a920d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -667,7 +667,7 @@ config PARAVIRT_DEBUG config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" depends on PARAVIRT && SMP - select UNINLINE_SPIN_UNLOCK + select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCK ---help--- Paravirtualized spinlocks allow a pvops backend to replace the spinlock implementation with something virtualization-friendly diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8957810ad7d1..266c35381b62 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#ifdef CONFIG_QUEUED_SPINLOCK + +static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, + u32 val) +{ + PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val); +} + +static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock); +} + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + PVOP_VCALL2(pv_lock_ops.wait, ptr, val); +} + +static __always_inline void pv_kick(int cpu) +{ + PVOP_VCALL1(pv_lock_ops.kick, cpu); +} + +#else /* !CONFIG_QUEUED_SPINLOCK */ + static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, __ticket_t ticket) { @@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); } -#endif +#endif /* CONFIG_QUEUED_SPINLOCK */ + +#endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f7b0b5c112f2..76cd68426af8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -333,9 +333,19 @@ struct arch_spinlock; typedef u16 __ticket_t; #endif +struct qspinlock; + struct pv_lock_ops { +#ifdef CONFIG_QUEUED_SPINLOCK + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + struct paravirt_callee_save queued_spin_unlock; + + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); +#else /* !CONFIG_QUEUED_SPINLOCK */ struct paravirt_callee_save lock_spinning; void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); +#endif /* !CONFIG_QUEUED_SPINLOCK */ }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index f079b7020e3f..9d51fae1cba3 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -3,6 +3,7 @@ #include #include +#include #define queued_spin_unlock queued_spin_unlock /** @@ -11,11 +12,33 @@ * * A smp_store_release() on the least-significant byte. */ -static inline void queued_spin_unlock(struct qspinlock *lock) +static inline void native_queued_spin_unlock(struct qspinlock *lock) { smp_store_release((u8 *)lock, 0); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} +#else +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} +#endif + #define virt_queued_spin_lock virt_queued_spin_lock static inline bool virt_queued_spin_lock(struct qspinlock *lock) diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..b002e711ba88 --- /dev/null +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -0,0 +1,6 @@ +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); + +#endif diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index bbb6c7316341..a33f1eb15003 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -8,11 +8,33 @@ #include +#ifdef CONFIG_QUEUED_SPINLOCK +__visible void __native_queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} + +PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); + +bool pv_is_native_spin_unlock(void) +{ + return pv_lock_ops.queued_spin_unlock.func == + __raw_callee_save___native_queued_spin_unlock; +} +#endif + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP +#ifdef CONFIG_QUEUED_SPINLOCK + .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), + .wait = paravirt_nop, + .kick = paravirt_nop, +#else /* !CONFIG_QUEUED_SPINLOCK */ .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), .unlock_kick = paravirt_nop, -#endif +#endif /* !CONFIG_QUEUED_SPINLOCK */ +#endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index d9f32e6d6ab6..e1b013696dde 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) +DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); +#endif + unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) { /* arg in %eax, return in %eax */ @@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) return 0; } +extern bool pv_is_native_spin_unlock(void); + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { @@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_cpu_ops, read_tsc); - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) + case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): + if (pv_is_native_spin_unlock()) { + start = start_pv_lock_ops_queued_spin_unlock; + end = end_pv_lock_ops_queued_spin_unlock; + goto patch_site; + } +#endif default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; + +patch_site: + ret = paravirt_patch_insns(ibuf, len, start, end); + break; } #undef PATCH_SITE return ret; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..e0fb41c8255b 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK) +DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); +#endif + unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) { return paravirt_patch_insns(insnbuf, len, @@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) start__mov64, end__mov64); } +extern bool pv_is_native_spin_unlock(void); + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { @@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK) + case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): + if (pv_is_native_spin_unlock()) { + start = start_pv_lock_ops_queued_spin_unlock; + end = end_pv_lock_ops_queued_spin_unlock; + goto patch_site; + } +#endif default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; + +patch_site: + ret = paravirt_patch_insns(ibuf, len, start, end); + break; } #undef PATCH_SITE return ret; From bf0c7c34adc286bec3a5a38c00c773ba1b2d0396 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Apr 2015 14:56:39 -0400 Subject: [PATCH 049/481] locking/pvqspinlock, x86: Enable PV qspinlock for KVM This patch adds the necessary KVM specific code to allow KVM to support the CPU halting and kicking operations needed by the queue spinlock PV code. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: David Vrabel Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-11-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 43 +++++++++++++++++++++++++++++++++++++++++++ kernel/Kconfig.locks | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9435620062df..6c21d931bd24 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu) kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); } + +#ifdef CONFIG_QUEUED_SPINLOCK + +#include + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + /* + * halt until it's our turn and kicked. Note that we do safe halt + * for irq enabled case to avoid hang when lock info is overwritten + * in irq spinlock slowpath and no spurious interrupt occur to save us. + */ + if (arch_irqs_disabled_flags(flags)) + halt(); + else + safe_halt(); + +out: + local_irq_restore(flags); +} + +#else /* !CONFIG_QUEUED_SPINLOCK */ + enum kvm_contention_stat { TAKEN_SLOW, TAKEN_SLOW_PICKUP, @@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) } } +#endif /* !CONFIG_QUEUED_SPINLOCK */ + /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. */ @@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; +#ifdef CONFIG_QUEUED_SPINLOCK + __pv_init_lock_hash(); + pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_lock_ops.wait = kvm_wait; + pv_lock_ops.kick = kvm_kick_cpu; +#else /* !CONFIG_QUEUED_SPINLOCK */ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); pv_lock_ops.unlock_kick = kvm_unlock_kick; +#endif } static __init int kvm_spinlock_init_jump(void) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 95fdad866a98..4379eef9334d 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -240,7 +240,7 @@ config ARCH_USE_QUEUED_SPINLOCK config QUEUED_SPINLOCK def_bool y if ARCH_USE_QUEUED_SPINLOCK - depends on SMP && !PARAVIRT_SPINLOCKS + depends on SMP && (!PARAVIRT_SPINLOCKS || !XEN) config ARCH_USE_QUEUE_RWLOCK bool From e95e6f176c61dd0e7bd9fdfb4956df1f9bfe99d4 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 24 Apr 2015 14:56:40 -0400 Subject: [PATCH 050/481] locking/pvqspinlock, x86: Enable PV qspinlock for Xen This patch adds the necessary Xen specific code to allow Xen to support the CPU halting and kicking operations needed by the queue spinlock PV code. Signed-off-by: David Vrabel Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-12-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/xen/spinlock.c | 64 ++++++++++++++++++++++++++++++++++++++--- kernel/Kconfig.locks | 2 +- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 956374c1edbc..af907a90fb19 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -17,6 +17,56 @@ #include "xen-ops.h" #include "debugfs.h" +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(char *, irq_name); +static bool xen_pvspin = true; + +#ifdef CONFIG_QUEUED_SPINLOCK + +#include + +static void xen_qlock_kick(int cpu) +{ + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); +} + +/* + * Halt the current CPU & release it back to the host + */ +static void xen_qlock_wait(u8 *byte, u8 val) +{ + int irq = __this_cpu_read(lock_kicker_irq); + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return; + + /* clear pending */ + xen_clear_irq_pending(irq); + barrier(); + + /* + * We check the byte value after clearing pending IRQ to make sure + * that we won't miss a wakeup event because of the clearing. + * + * The sync_clear_bit() call in xen_clear_irq_pending() is atomic. + * So it is effectively a memory barrier for x86. + */ + if (READ_ONCE(*byte) != val) + return; + + /* + * If an interrupt happens here, it will leave the wakeup irq + * pending, which will cause xen_poll_irq() to return + * immediately. + */ + + /* Block until irq becomes pending (or perhaps a spurious wakeup) */ + xen_poll_irq(irq); +} + +#else /* CONFIG_QUEUED_SPINLOCK */ + enum xen_contention_stat { TAKEN_SLOW, TAKEN_SLOW_PICKUP, @@ -100,12 +150,9 @@ struct xen_lock_waiting { __ticket_t want; }; -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; -static DEFINE_PER_CPU(char *, irq_name); static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); static cpumask_t waiting_cpus; -static bool xen_pvspin = true; __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) { int irq = __this_cpu_read(lock_kicker_irq); @@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) } } } +#endif /* CONFIG_QUEUED_SPINLOCK */ static irqreturn_t dummy_handler(int irq, void *dev_id) { @@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void) return; } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); +#ifdef CONFIG_QUEUED_SPINLOCK + __pv_init_lock_hash(); + pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_lock_ops.wait = xen_qlock_wait; + pv_lock_ops.kick = xen_qlock_kick; +#else pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; +#endif } /* @@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg) } early_param("xen_nopvspin", xen_parse_nopvspin); -#ifdef CONFIG_XEN_DEBUG_FS +#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCK) static struct dentry *d_spin_debug; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 4379eef9334d..95dd7587ec34 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -240,7 +240,7 @@ config ARCH_USE_QUEUED_SPINLOCK config QUEUED_SPINLOCK def_bool y if ARCH_USE_QUEUED_SPINLOCK - depends on SMP && (!PARAVIRT_SPINLOCKS || !XEN) + depends on SMP config ARCH_USE_QUEUE_RWLOCK bool From c5c19941ad1bb18f010ae47f1db333c00b276d55 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 8 May 2015 13:25:45 +0300 Subject: [PATCH 051/481] x86/kconfig: Bump default NR_CPUS from 8 to 64 for 64-bit configuration Default NR_CPUS==8 is not enough to cover high-end desktop configuration: Haswell-E has upto 16 threads. Let's increase default NR_CPUS to 64 on 64-bit configuration. With this value CPU bitmask will still fit into one unsigned long. Default for 32-bit configuration is still 8: it's unlikely anybody will run 32-bit kernels on modern hardware. As an alternative we could bump NR_CPUS to 128 to cover all dual-processor servers with some margin. For reference: Debian and Suse build their kernels with NR_CPUS==512, Fedora -- 1024. Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431080745-19792-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..83cd1c7aa409 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -851,7 +851,8 @@ config NR_CPUS default "1" if !SMP default "8192" if MAXSMP default "32" if SMP && X86_BIGSMP - default "8" if SMP + default "8" if SMP && X86_32 + default "64" if SMP ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum From cad14bb9f8ef8bed42c3118adc0d9756e2aeeaa1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 8 May 2015 13:25:26 +0300 Subject: [PATCH 052/481] x86/kconfig: Fix the CONFIG_NR_CPUS description Since: b53b5eda8194 ("x86/cpu: Increase max CPU count to 8192") ... the maximum supported NR_CPUS for CPUMASK_OFFSTACK case is 8192. Let's adjust the description to reflect the change. Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431080726-2490-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 83cd1c7aa409..c3333e5be5d7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -856,7 +856,7 @@ config NR_CPUS ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum - supported value is 4096, otherwise the maximum value is 512. The + supported value is 8192, otherwise the maximum value is 512. The minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds From 37815bf866ab6722a47550f8d25ad3f1a16a680c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 9 May 2015 03:06:23 +0930 Subject: [PATCH 053/481] module: Call module notifier on failure after complete_formation() The module notifier call chain for MODULE_STATE_COMING was moved up before the parsing of args, into the complete_formation() call. But if the module failed to load after that, the notifier call chain for MODULE_STATE_GOING was never called and that prevented the users of those call chains from cleaning up anything that was allocated. Link: http://lkml.kernel.org/r/554C52B9.9060700@gmail.com Reported-by: Pontus Fuchs Fixes: 4982223e51e8 "module: set nx before marking module MODULE_STATE_COMING" Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Steven Rostedt Signed-off-by: Rusty Russell --- kernel/module.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..cfc9e843a924 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); From f94029d8801d7c27a94a3ea6c4967aa33c49c34b Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Mon, 4 May 2015 23:04:14 +0200 Subject: [PATCH 054/481] clk: si5351: Mention clock-names in the binding documentation Since the introduction of clk-si5351 the way we should deal with DT provided clocks has changed from indexed to named clock phandles. Amend the binding documentation to reflect named clock phandles by clock-names property. Signed-off-by: Sebastian Hesselbarth Signed-off-by: Michael Turquette --- Documentation/devicetree/bindings/clock/silabs,si5351.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt index c40711e8e8f7..28b28309f535 100644 --- a/Documentation/devicetree/bindings/clock/silabs,si5351.txt +++ b/Documentation/devicetree/bindings/clock/silabs,si5351.txt @@ -17,7 +17,8 @@ Required properties: - #clock-cells: from common clock binding; shall be set to 1. - clocks: from common clock binding; list of parent clock handles, shall be xtal reference clock or xtal and clkin for - si5351c only. + si5351c only. Corresponding clock input names are "xtal" and + "clkin" respectively. - #address-cells: shall be set to 1. - #size-cells: shall be set to 0. @@ -71,6 +72,7 @@ i2c-master-node { /* connect xtal input to 25MHz reference */ clocks = <&ref25>; + clock-names = "xtal"; /* connect xtal input as source of pll0 and pll1 */ silabs,pll-source = <0 0>, <1 0>; From 0cd3be6e9a46f84ef7a42e1a5645d32ad547b12e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Mon, 4 May 2015 23:04:16 +0200 Subject: [PATCH 055/481] clk: si5351: Do not pass struct clk in platform_data When registering clk-si5351 by platform_data, we should not pass struct clk for the reference clocks. Drop struct clk from platform_data and rework the driver to use devm_clk_get of named clock references. While at it, check for at least one valid input clock and properly prepare/ enable valid reference clocks. Signed-off-by: Sebastian Hesselbarth Reported-by: Michael Welling Reported-by: Jean-Francois Moine Reported-by: Russell King Tested-by: Michael Welling Tested-by: Jean-Francois Moine Signed-off-by: Michael Turquette --- drivers/clk/clk-si5351.c | 63 ++++++++++++++++++++-------- include/linux/platform_data/si5351.h | 4 -- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c index 44ea107cfc67..30335d3b99af 100644 --- a/drivers/clk/clk-si5351.c +++ b/drivers/clk/clk-si5351.c @@ -1128,13 +1128,6 @@ static int si5351_dt_parse(struct i2c_client *client, if (!pdata) return -ENOMEM; - pdata->clk_xtal = of_clk_get(np, 0); - if (!IS_ERR(pdata->clk_xtal)) - clk_put(pdata->clk_xtal); - pdata->clk_clkin = of_clk_get(np, 1); - if (!IS_ERR(pdata->clk_clkin)) - clk_put(pdata->clk_clkin); - /* * property silabs,pll-source : , [<..>] * allow to selectively set pll source @@ -1328,8 +1321,22 @@ static int si5351_i2c_probe(struct i2c_client *client, i2c_set_clientdata(client, drvdata); drvdata->client = client; drvdata->variant = variant; - drvdata->pxtal = pdata->clk_xtal; - drvdata->pclkin = pdata->clk_clkin; + drvdata->pxtal = devm_clk_get(&client->dev, "xtal"); + drvdata->pclkin = devm_clk_get(&client->dev, "clkin"); + + if (PTR_ERR(drvdata->pxtal) == -EPROBE_DEFER || + PTR_ERR(drvdata->pclkin) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + /* + * Check for valid parent clock: VARIANT_A and VARIANT_B need XTAL, + * VARIANT_C can have CLKIN instead. + */ + if (IS_ERR(drvdata->pxtal) && + (drvdata->variant != SI5351_VARIANT_C || IS_ERR(drvdata->pclkin))) { + dev_err(&client->dev, "missing parent clock\n"); + return -EINVAL; + } drvdata->regmap = devm_regmap_init_i2c(client, &si5351_regmap_config); if (IS_ERR(drvdata->regmap)) { @@ -1393,6 +1400,11 @@ static int si5351_i2c_probe(struct i2c_client *client, } } + if (!IS_ERR(drvdata->pxtal)) + clk_prepare_enable(drvdata->pxtal); + if (!IS_ERR(drvdata->pclkin)) + clk_prepare_enable(drvdata->pclkin); + /* register xtal input clock gate */ memset(&init, 0, sizeof(init)); init.name = si5351_input_names[0]; @@ -1407,7 +1419,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->xtal); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return PTR_ERR(clk); + ret = PTR_ERR(clk); + goto err_clk; } /* register clkin input clock gate */ @@ -1425,7 +1438,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return PTR_ERR(clk); + ret = PTR_ERR(clk); + goto err_clk; } } @@ -1447,7 +1461,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->pll[0].hw); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } /* register PLLB or VXCO (Si5351B) */ @@ -1471,7 +1486,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->pll[1].hw); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } /* register clk multisync and clk out divider */ @@ -1492,8 +1508,10 @@ static int si5351_i2c_probe(struct i2c_client *client, num_clocks * sizeof(*drvdata->onecell.clks), GFP_KERNEL); if (WARN_ON(!drvdata->msynth || !drvdata->clkout || - !drvdata->onecell.clks)) - return -ENOMEM; + !drvdata->onecell.clks)) { + ret = -ENOMEM; + goto err_clk; + } for (n = 0; n < num_clocks; n++) { drvdata->msynth[n].num = n; @@ -1511,7 +1529,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } } @@ -1538,7 +1557,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } drvdata->onecell.clks[n] = clk; @@ -1557,10 +1577,17 @@ static int si5351_i2c_probe(struct i2c_client *client, &drvdata->onecell); if (ret) { dev_err(&client->dev, "unable to add clk provider\n"); - return ret; + goto err_clk; } return 0; + +err_clk: + if (!IS_ERR(drvdata->pxtal)) + clk_disable_unprepare(drvdata->pxtal); + if (!IS_ERR(drvdata->pclkin)) + clk_disable_unprepare(drvdata->pclkin); + return ret; } static const struct i2c_device_id si5351_i2c_ids[] = { diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index a947ab8b441a..533d9807e543 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -5,8 +5,6 @@ #ifndef __LINUX_PLATFORM_DATA_SI5351_H__ #define __LINUX_PLATFORM_DATA_SI5351_H__ -struct clk; - /** * enum si5351_pll_src - Si5351 pll clock source * @SI5351_PLL_SRC_DEFAULT: default, do not change eeprom config @@ -107,8 +105,6 @@ struct si5351_clkout_config { * @clkout: array of clkout configuration */ struct si5351_platform_data { - struct clk *clk_xtal; - struct clk *clk_clkin; enum si5351_pll_src pll_src[2]; struct si5351_clkout_config clkout[8]; }; From 79010636174c78209e20c4f44370b2b13312e08c Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 22 Apr 2015 18:21:41 +0530 Subject: [PATCH 056/481] thermal: ti-soc-thermal: dra7: Implement Workaround for Errata i814 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bandgap Temperature read Dtemp can be corrupted DESCRIPTION Read accesses to registers listed below can be corrupted due to incorrect resynchronization between clock domains. Read access to registers below can be corrupted : • CTRL_CORE_DTEMP_MPU/GPU/CORE/DSPEVE/IVA_n (n = 0 to 4) • CTRL_CORE_TEMP_SENSOR_MPU/GPU/CORE/DSPEVE/IVA_n WORKAROUND Multiple reads to CTRL_CORE_TEMP_SENSOR_MPU/GPU/CORE/DSPEVE/IVA[9:0]: BGAP_DTEMPMPU/GPU/CORE/DSPEVE/IVA is needed to discard false value and read right value: 1. Perform two successive reads to BGAP_DTEMP bit field. (a) If read1 returns Val1 and read2 returns Val1, then right value is Val1. (b) If read1 returns Val1, read 2 returns Val2, a third read is needed. 2. Perform third read (a) If read3 returns Val2 then right value is Val2. (b) If read3 returns Val3, then right value is Val3. The above in gist means if val1 and val2 are the same then we can go ahead with that value else we need a third read which will be right since synchronization will be complete by then. Signed-off-by: Keerthy Signed-off-by: Eduardo Valentin --- .../ti-soc-thermal/dra752-thermal-data.c | 3 +- drivers/thermal/ti-soc-thermal/ti-bandgap.c | 37 ++++++++++++++++++- drivers/thermal/ti-soc-thermal/ti-bandgap.h | 4 ++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c index a4929272074f..58b5c6694cd4 100644 --- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c +++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c @@ -420,7 +420,8 @@ const struct ti_bandgap_data dra752_data = { TI_BANDGAP_FEATURE_FREEZE_BIT | TI_BANDGAP_FEATURE_TALERT | TI_BANDGAP_FEATURE_COUNTER_DELAY | - TI_BANDGAP_FEATURE_HISTORY_BUFFER, + TI_BANDGAP_FEATURE_HISTORY_BUFFER | + TI_BANDGAP_FEATURE_ERRATA_814, .fclock_name = "l3instr_ts_gclk_div", .div_ck_name = "l3instr_ts_gclk_div", .conv_table = dra752_adc_to_temp, diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 62a5d449c388..9747523858a1 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -118,6 +118,37 @@ exit: return ret; } +/** + * ti_errata814_bandgap_read_temp() - helper function to read dra7 sensor temperature + * @bgp: pointer to ti_bandgap structure + * @reg: desired register (offset) to be read + * + * Function to read dra7 bandgap sensor temperature. This is done separately + * so as to workaround the errata "Bandgap Temperature read Dtemp can be + * corrupted" - Errata ID: i814". + * Read accesses to registers listed below can be corrupted due to incorrect + * resynchronization between clock domains. + * Read access to registers below can be corrupted : + * CTRL_CORE_DTEMP_MPU/GPU/CORE/DSPEVE/IVA_n (n = 0 to 4) + * CTRL_CORE_TEMP_SENSOR_MPU/GPU/CORE/DSPEVE/IVA_n + * + * Return: the register value. + */ +static u32 ti_errata814_bandgap_read_temp(struct ti_bandgap *bgp, u32 reg) +{ + u32 val1, val2; + + val1 = ti_bandgap_readl(bgp, reg); + val2 = ti_bandgap_readl(bgp, reg); + + /* If both times we read the same value then that is right */ + if (val1 == val2) + return val1; + + /* if val1 and val2 are different read it third time */ + return ti_bandgap_readl(bgp, reg); +} + /** * ti_bandgap_read_temp() - helper function to read sensor temperature * @bgp: pointer to ti_bandgap structure @@ -148,7 +179,11 @@ static u32 ti_bandgap_read_temp(struct ti_bandgap *bgp, int id) } /* read temperature */ - temp = ti_bandgap_readl(bgp, reg); + if (TI_BANDGAP_HAS(bgp, ERRATA_814)) + temp = ti_errata814_bandgap_read_temp(bgp, reg); + else + temp = ti_bandgap_readl(bgp, reg); + temp &= tsr->bgap_dtemp_mask; if (TI_BANDGAP_HAS(bgp, FREEZE_BIT)) diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h index b3adf72f252d..b2da3fc42b51 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h @@ -318,6 +318,9 @@ struct ti_temp_sensor { * TI_BANDGAP_FEATURE_HISTORY_BUFFER - used when the bandgap device features * a history buffer of temperatures. * + * TI_BANDGAP_FEATURE_ERRATA_814 - used to workaorund when the bandgap device + * has Errata 814 + * * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a * specific feature (above) or not. Return non-zero, if yes. */ @@ -331,6 +334,7 @@ struct ti_temp_sensor { #define TI_BANDGAP_FEATURE_FREEZE_BIT BIT(7) #define TI_BANDGAP_FEATURE_COUNTER_DELAY BIT(8) #define TI_BANDGAP_FEATURE_HISTORY_BUFFER BIT(9) +#define TI_BANDGAP_FEATURE_ERRATA_814 BIT(10) #define TI_BANDGAP_HAS(b, f) \ ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f) From e9a90d046b9c9fe8f7c8efefe7b0e64c3c0daa8f Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 22 Apr 2015 18:21:42 +0530 Subject: [PATCH 057/481] thermal: ti-soc-thermal: OMAP5: Implement Workaround for Errata i813 DESCRIPTION Spurious Thermal Alert: Talert can happen randomly while the device remains under the temperature limit defined for this event to trig. This spurious event is caused by a incorrect re-synchronization between clock domains. The comparison between configured threshold and current temperature value can happen while the value is transitioning (metastable), thus causing inappropriate event generation. No spurious event occurs as long as the threshold value stays unchanged. Spurious event can be generated while a thermal alert threshold is modified in CONTROL_BANDGAP_THRESHOLD_MPU/GPU/CORE/DSPEVE/IVA_n. WORKAROUND Spurious event generation can be avoided by performing following sequence when the threshold is modified: 1. Mask the hot/cold events at the thermal IP level. 2. Modify Threshold. 3. Unmask the hot/cold events at the thermal IP level. Signed-off-by: Keerthy Signed-off-by: Eduardo Valentin --- .../ti-soc-thermal/omap5-thermal-data.c | 3 +- drivers/thermal/ti-soc-thermal/ti-bandgap.c | 41 ++++++++++++++++++- drivers/thermal/ti-soc-thermal/ti-bandgap.h | 4 +- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c index eff0c80fd4af..79ff70c446ba 100644 --- a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c +++ b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c @@ -319,7 +319,8 @@ const struct ti_bandgap_data omap5430_data = { TI_BANDGAP_FEATURE_FREEZE_BIT | TI_BANDGAP_FEATURE_TALERT | TI_BANDGAP_FEATURE_COUNTER_DELAY | - TI_BANDGAP_FEATURE_HISTORY_BUFFER, + TI_BANDGAP_FEATURE_HISTORY_BUFFER | + TI_BANDGAP_FEATURE_ERRATA_813, .fclock_name = "l3instr_ts_gclk_div", .div_ck_name = "l3instr_ts_gclk_div", .conv_table = omap5430_adc_to_temp, diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 9747523858a1..bc14dc874594 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -445,7 +445,7 @@ static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id, { struct temp_sensor_data *ts_data = bgp->conf->sensors[id].ts_data; struct temp_sensor_registers *tsr; - u32 thresh_val, reg_val, t_hot, t_cold; + u32 thresh_val, reg_val, t_hot, t_cold, ctrl; int err = 0; tsr = bgp->conf->sensors[id].registers; @@ -477,8 +477,47 @@ static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id, ~(tsr->threshold_thot_mask | tsr->threshold_tcold_mask); reg_val |= (t_hot << __ffs(tsr->threshold_thot_mask)) | (t_cold << __ffs(tsr->threshold_tcold_mask)); + + /** + * Errata i813: + * Spurious Thermal Alert: Talert can happen randomly while the device + * remains under the temperature limit defined for this event to trig. + * This spurious event is caused by a incorrect re-synchronization + * between clock domains. The comparison between configured threshold + * and current temperature value can happen while the value is + * transitioning (metastable), thus causing inappropriate event + * generation. No spurious event occurs as long as the threshold value + * stays unchanged. Spurious event can be generated while a thermal + * alert threshold is modified in + * CONTROL_BANDGAP_THRESHOLD_MPU/GPU/CORE/DSPEVE/IVA_n. + */ + + if (TI_BANDGAP_HAS(bgp, ERRATA_813)) { + /* Mask t_hot and t_cold events at the IP Level */ + ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); + + if (hot) + ctrl &= ~tsr->mask_hot_mask; + else + ctrl &= ~tsr->mask_cold_mask; + + ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl); + } + + /* Write the threshold value */ ti_bandgap_writel(bgp, reg_val, tsr->bgap_threshold); + if (TI_BANDGAP_HAS(bgp, ERRATA_813)) { + /* Unmask t_hot and t_cold events at the IP Level */ + ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); + if (hot) + ctrl |= tsr->mask_hot_mask; + else + ctrl |= tsr->mask_cold_mask; + + ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl); + } + if (err) { dev_err(bgp->dev, "failed to reprogram thot threshold\n"); err = -EIO; diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h index b2da3fc42b51..0c52f7afba00 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h @@ -320,7 +320,8 @@ struct ti_temp_sensor { * * TI_BANDGAP_FEATURE_ERRATA_814 - used to workaorund when the bandgap device * has Errata 814 - * + * TI_BANDGAP_FEATURE_ERRATA_813 - used to workaorund when the bandgap device + * has Errata 813 * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a * specific feature (above) or not. Return non-zero, if yes. */ @@ -335,6 +336,7 @@ struct ti_temp_sensor { #define TI_BANDGAP_FEATURE_COUNTER_DELAY BIT(8) #define TI_BANDGAP_FEATURE_HISTORY_BUFFER BIT(9) #define TI_BANDGAP_FEATURE_ERRATA_814 BIT(10) +#define TI_BANDGAP_FEATURE_ERRATA_813 BIT(11) #define TI_BANDGAP_HAS(b, f) \ ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f) From efa86858e1d8970411a140fa1e0c4dd18a8f2a89 Mon Sep 17 00:00:00 2001 From: Nadav Haklai Date: Wed, 15 Apr 2015 19:08:08 +0200 Subject: [PATCH 058/481] thermal: armada: Update Armada 380 thermal sensor coefficients Improve the Armada 380 thermal sensor accuracy by using updated formula. The updated formula is: Temperature[C degrees] = 0.4761 * tsen_vsen_out - 279.1 Signed-off-by: Nadav Haklai Signed-off-by: Gregory CLEMENT Cc: #v3.16 Signed-off-by: Eduardo Valentin --- drivers/thermal/armada_thermal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index c2556cf5186b..01255fd65135 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -224,9 +224,9 @@ static const struct armada_thermal_data armada380_data = { .is_valid_shift = 10, .temp_shift = 0, .temp_mask = 0x3ff, - .coef_b = 1169498786UL, - .coef_m = 2000000UL, - .coef_div = 4289, + .coef_b = 2931108200UL, + .coef_m = 5000000UL, + .coef_div = 10502, .inverted = true, }; From 5f0d98f278f943cb6115b6fe4441f11a7015bb50 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 30 Oct 2014 10:19:38 +0200 Subject: [PATCH 059/481] iwlwifi: mvm: forbid MIMO on devices that don't support it There are devices that forbid MIMO by the mean of the NVM. Detect thoses devices and forbid MIMO otherwise the firmware would crash. STBC is still allowed on these devices. Signed-off-by: Emmanuel Grumbach --- .../net/wireless/iwlwifi/iwl-eeprom-parse.c | 5 +++++ .../net/wireless/iwlwifi/iwl-eeprom-parse.h | 3 +++ drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 19 +++++++++++++------ drivers/net/wireless/iwlwifi/mvm/rs.c | 3 +++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c index 41ff85de7334..21302b6f2bfd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -748,6 +750,9 @@ void iwl_init_ht_hw_capab(const struct iwl_cfg *cfg, return; } + if (data->sku_cap_mimo_disabled) + rx_chains = 1; + ht_info->ht_supported = true; ht_info->cap = IEEE80211_HT_CAP_DSSSCCK40; diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h index 5234a0bf11e4..750c8c9ee70d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,6 +86,7 @@ struct iwl_nvm_data { bool sku_cap_11ac_enable; bool sku_cap_amt_enable; bool sku_cap_ipan_enable; + bool sku_cap_mimo_disabled; u16 radio_cfg_type; u8 radio_cfg_step; diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 83903a5025c2..cf86f3cdbb8e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -116,10 +116,11 @@ enum family_8000_nvm_offsets { /* SKU Capabilities (actual values from NVM definition) */ enum nvm_sku_bits { - NVM_SKU_CAP_BAND_24GHZ = BIT(0), - NVM_SKU_CAP_BAND_52GHZ = BIT(1), - NVM_SKU_CAP_11N_ENABLE = BIT(2), - NVM_SKU_CAP_11AC_ENABLE = BIT(3), + NVM_SKU_CAP_BAND_24GHZ = BIT(0), + NVM_SKU_CAP_BAND_52GHZ = BIT(1), + NVM_SKU_CAP_11N_ENABLE = BIT(2), + NVM_SKU_CAP_11AC_ENABLE = BIT(3), + NVM_SKU_CAP_MIMO_DISABLE = BIT(5), }; /* @@ -368,6 +369,11 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg, if (cfg->ht_params->ldpc) vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC; + if (data->sku_cap_mimo_disabled) { + num_rx_ants = 1; + num_tx_ants = 1; + } + if (num_tx_ants > 1) vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC; else @@ -610,6 +616,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg, data->sku_cap_11n_enable = false; data->sku_cap_11ac_enable = data->sku_cap_11n_enable && (sku & NVM_SKU_CAP_11AC_ENABLE); + data->sku_cap_mimo_disabled = sku & NVM_SKU_CAP_MIMO_DISABLE; data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw); diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c index f9928f2c125f..33cd68ae7bf9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/iwlwifi/mvm/rs.c @@ -180,6 +180,9 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (iwl_mvm_vif_low_latency(mvmvif) && mvmsta->vif->p2p) return false; + if (mvm->nvm_data->sku_cap_mimo_disabled) + return false; + return true; } From 17d489019c5249d059768184c80e9b2b0269b81e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2015 14:49:07 +1000 Subject: [PATCH 060/481] KVM: PPC: Book3S HV: Fix list traversal in error case This fixes a regression introduced in commit 25fedfca94cf, "KVM: PPC: Book3S HV: Move vcore preemption point up into kvmppc_run_vcpu", which leads to a user-triggerable oops. In the case where we try to run a vcore on a physical core that is not in single-threaded mode, or the vcore has too many threads for the physical core, we iterate the list of runnable vcpus to make each one return an EBUSY error to userspace. Since this involves taking each vcpu off the runnable_threads list for the vcore, we need to use list_for_each_entry_safe rather than list_for_each_entry to traverse the list. Otherwise the kernel will crash with an oops message like this: Unable to handle kernel paging request for data at address 0x000fff88 Faulting instruction address: 0xd00000001e635dc8 Oops: Kernel access of bad area, sig: 11 [#2] SMP NR_CPUS=1024 NUMA PowerNV ... CPU: 48 PID: 91256 Comm: qemu-system-ppc Tainted: G D 3.18.0 #1 task: c00000274e507500 ti: c0000027d1924000 task.ti: c0000027d1924000 NIP: d00000001e635dc8 LR: d00000001e635df8 CTR: c00000000011ba50 REGS: c0000027d19275b0 TRAP: 0300 Tainted: G D (3.18.0) MSR: 9000000000009033 CR: 22002824 XER: 00000000 CFAR: c000000000008468 DAR: 00000000000fff88 DSISR: 40000000 SOFTE: 1 GPR00: d00000001e635df8 c0000027d1927830 d00000001e64c850 0000000000000001 GPR04: 0000000000000001 0000000000000001 0000000000000000 0000000000000000 GPR08: 0000000000200200 0000000000000000 0000000000000000 d00000001e63e588 GPR12: 0000000000002200 c000000007dbc800 c000000fc7800000 000000000000000a GPR16: fffffffffffffffc c000000fd5439690 c000000fc7801c98 0000000000000001 GPR20: 0000000000000003 c0000027d1927aa8 c000000fd543b348 c000000fd543b350 GPR24: 0000000000000000 c000000fa57f0000 0000000000000030 0000000000000000 GPR28: fffffffffffffff0 c000000fd543b328 00000000000fe468 c000000fd543b300 NIP [d00000001e635dc8] kvmppc_run_core+0x198/0x17c0 [kvm_hv] LR [d00000001e635df8] kvmppc_run_core+0x1c8/0x17c0 [kvm_hv] Call Trace: [c0000027d1927830] [d00000001e635df8] kvmppc_run_core+0x1c8/0x17c0 [kvm_hv] (unreliable) [c0000027d1927a30] [d00000001e638350] kvmppc_vcpu_run_hv+0x5b0/0xdd0 [kvm_hv] [c0000027d1927b70] [d00000001e510504] kvmppc_vcpu_run+0x44/0x60 [kvm] [c0000027d1927ba0] [d00000001e50d4a4] kvm_arch_vcpu_ioctl_run+0x64/0x170 [kvm] [c0000027d1927be0] [d00000001e504be8] kvm_vcpu_ioctl+0x5e8/0x7a0 [kvm] [c0000027d1927d40] [c0000000002d6720] do_vfs_ioctl+0x490/0x780 [c0000027d1927de0] [c0000000002d6ae4] SyS_ioctl+0xd4/0xf0 [c0000027d1927e30] [c000000000009358] syscall_exit+0x0/0x98 Instruction dump: 60000000 60420000 387e1b30 38800003 38a00001 38c00000 480087d9 e8410018 ebde1c98 7fbdf040 3bdee368 419e0048 <813e1b20> 939e1b18 2f890001 409effcc ---[ end trace 8cdf50251cca6680 ]--- Fixes: 25fedfca94cfbf2461314c6c34ef58e74a31b025 Signed-off-by: Paul Mackerras Reviewed-by: Alexander Graf Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48d3c5d2ecc9..df81caab7383 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vnext; int i; int srcu_idx; @@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); From f5d0684e848f01347ba510545822c205889f8acc Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 2 Apr 2015 14:51:35 +0100 Subject: [PATCH 061/481] cifs: Don't replace dentries for dfs mounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing a readdir on a dfs root can result in the dentries for directories with a dfs share mounted being replaced by new dentries for objects returned by the readdir call. These new dentries on shares mounted with unix extenstions show up as symlinks pointing to the dfs share. # mount -t cifs -o sec=none //vm140-31/dfsroot cifs # stat cifs/testlink/testfile; ls -l cifs File: ‘cifs/testlink/testfile’ Size: 0 Blocks: 0 IO Block: 16384 regular empty file Device: 27h/39d Inode: 130120 Links: 1 Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2015-03-31 13:55:50.106018200 +0100 Modify: 2015-03-31 13:55:50.106018200 +0100 Change: 2015-03-31 13:55:50.106018200 +0100 Birth: - total 0 drwxr-xr-x 2 root root 0 Mar 31 13:54 testdir lrwxrwxrwx 1 root root 19 Mar 24 14:25 testlink -> \vm140-31\test In the example above, the stat command mounts the dfs share at cifs/testlink. The subsequent ls on the dfsroot directory replaces the dentry for testlink with a symlink. In the earlier code, the d_invalidate command returned an -EBUSY error when attempting to invalidate directories. This stopped the code from replacing the directories with symlinks returned by the readdir call. Changes were recently made to the d_invalidate() command so that it no longer returns an error code. This results in the directory with the mounted dfs share being replaced by a symlink which denotes a dfs share. Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b4a47237486b..b1eede3678a9 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, if (dentry) { inode = d_inode(dentry); if (inode) { + if (d_mountpoint(dentry)) + goto out; /* * If we're generating inode numbers, then we don't * want to clobber the existing one with the one that From bc8ebdc4f54cc944b0ecc0fb0d18b0ffbaab0468 Mon Sep 17 00:00:00 2001 From: Nakajima Akira Date: Fri, 13 Feb 2015 15:35:58 +0900 Subject: [PATCH 062/481] Fix that several functions handle incorrect value of mapchars Cifs client has problem with reserved chars filename. [BUG1] : several functions handle incorrect value of mapchars - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); [BUG2] : forget to convert reserved chars when creating SymbolicLink. - CIFSUnixCreateSymLink() calls cifs_strtoUTF16 + CIFSUnixCreateSymLink() calls cifsConvertToUTF16() with remap [BUG3] : forget to convert reserved chars when getting SymbolicLink. - CIFSSMBUnixQuerySymLink() calls cifs_strtoUTF16 + CIFSSMBUnixQuerySymLink() calls cifsConvertToUTF16() with remap [BUG4] : /proc/mounts don't show "mapposix" when using mapposix mount option + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + seq_puts(s, ",mapposix"); Reported-by: t.wede@kw-reneg.de Reported-by: Nakajima Akira Signed-off-by: Nakajima Akira Signed-off-by: Carl Schaefer Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 3 ++- fs/cifs/cifsfs.c | 2 ++ fs/cifs/cifsproto.h | 4 ++-- fs/cifs/cifssmb.c | 21 +++++++++++---------- fs/cifs/dir.c | 3 +-- fs/cifs/file.c | 3 +-- fs/cifs/inode.c | 6 ++---- fs/cifs/link.c | 3 ++- fs/cifs/smb1ops.c | 3 ++- 9 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 430e0348c99e..7dc886c9a78f 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -24,6 +24,7 @@ #include "cifsfs.h" #include "dns_resolve.h" #include "cifs_debug.h" +#include "cifs_unicode.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) xid = get_xid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5089bde3635..0a9fb6b53126 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nouser_xattr"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) seq_puts(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + seq_puts(s, ",mapposix"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) seq_puts(s, ",sfu"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c31ce98c1704..c63fd1dde25b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid, extern int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 84650a51c7c4..1091affba425 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2784,7 +2784,7 @@ copyRetry: int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -2804,9 +2804,9 @@ createSymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName, - /* find define for this maxpathcomponent */ - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2828,9 +2828,9 @@ createSymLinkRetry: data_offset = (char *) (&pSMB->hdr.Protocol) + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = - cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifsConvertToUTF16((__le16 *) data_offset, toName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3034,7 +3034,7 @@ winCreateHardLinkRetry: int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **symlinkinfo, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { /* SMB_QUERY_FILE_UNIX_LINK */ TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -3055,8 +3055,9 @@ querySymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 338d56936f6a..c3eb998a99bd 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, } rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc) goto mknod_out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cafbf10521d5..5cfa7129d876 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags = cifs_posix_convert_flags(f_flags); rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (rc) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 55b58112d122..ef71aa1515f6 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* could have done a find first instead but this returns more info */ rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (!rc) { @@ -2215,8 +2214,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) pTcon = tlink_tcon(tlink); rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 252e672d5604..e6c707cc62b3 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -717,7 +717,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); else if (pTcon->unix_ext) rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); /* else rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName, cifs_sb_target->local_nls); */ diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7bfdd6066276..fc537c29044e 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, /* Check for unix extensions */ if (cap_unix(tcon->ses)) { rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); if (rc == -EREMOTE) rc = cifs_unix_dfs_readlink(xid, tcon, full_path, target_path, From 52c9d2badd1ae4d11c29de57d4e964e48afd3cb4 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 10 May 2015 21:17:10 -0400 Subject: [PATCH 063/481] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb() The xchg() function was used in pv_wait_node() to set a certain value and provide a memory barrier which is what the set_mb() function is for. This patch replaces the xchg() call by set_mb(). Suggested-by: Linus Torvalds Signed-off-by: Waiman Long Cc: Douglas Hatch Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index b5758a95a8d3..27ab96dca68c 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -175,7 +175,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * * Matches the xchg() from pv_kick_node(). */ - (void)xchg(&pn->state, vcpu_halted); + set_mb(pn->state, vcpu_halted); if (!READ_ONCE(node->locked)) pv_wait(&pn->state, vcpu_halted); From 62c7a1e9ae54ef66658df9614bdbc09cbbdaa6f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 May 2015 09:47:23 +0200 Subject: [PATCH 064/481] locking/pvqspinlock: Rename QUEUED_SPINLOCK to QUEUED_SPINLOCKS Valentin Rothberg reported that we use CONFIG_QUEUED_SPINLOCKS in arch/x86/kernel/paravirt_patch_32.c, while the symbol is called CONFIG_QUEUED_SPINLOCK. (Note the extra 'S') But the typo was natural: the proper English term for such a generic object would be 'queued spinlocks' - so rename this and related symbols accordingly to the plural form. Reported-by: Valentin Rothberg Cc: Douglas Hatch Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++-- arch/x86/include/asm/paravirt.h | 6 +++--- arch/x86/include/asm/paravirt_types.h | 6 +++--- arch/x86/include/asm/spinlock.h | 4 ++-- arch/x86/include/asm/spinlock_types.h | 4 ++-- arch/x86/kernel/kvm.c | 10 +++++----- arch/x86/kernel/paravirt-spinlocks.c | 8 ++++---- arch/x86/kernel/paravirt_patch_64.c | 4 ++-- arch/x86/xen/spinlock.c | 10 +++++----- kernel/Kconfig.locks | 6 +++--- kernel/locking/Makefile | 2 +- 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 50ec043a920d..f8dc6abbe6ae 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -127,7 +127,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_QUEUED_SPINLOCK + select ARCH_USE_QUEUED_SPINLOCKS select ARCH_USE_QUEUE_RWLOCK select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 @@ -667,7 +667,7 @@ config PARAVIRT_DEBUG config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" depends on PARAVIRT && SMP - select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCK + select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS ---help--- Paravirtualized spinlocks allow a pvops backend to replace the spinlock implementation with something virtualization-friendly diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 266c35381b62..d143bfad45d7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -712,7 +712,7 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) @@ -735,7 +735,7 @@ static __always_inline void pv_kick(int cpu) PVOP_VCALL1(pv_lock_ops.kick, cpu); } -#else /* !CONFIG_QUEUED_SPINLOCK */ +#else /* !CONFIG_QUEUED_SPINLOCKS */ static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, __ticket_t ticket) @@ -749,7 +749,7 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); } -#endif /* CONFIG_QUEUED_SPINLOCK */ +#endif /* CONFIG_QUEUED_SPINLOCKS */ #endif /* SMP && PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 76cd68426af8..8766c7c395c2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -336,16 +336,16 @@ typedef u16 __ticket_t; struct qspinlock; struct pv_lock_ops { -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); struct paravirt_callee_save queued_spin_unlock; void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); -#else /* !CONFIG_QUEUED_SPINLOCK */ +#else /* !CONFIG_QUEUED_SPINLOCKS */ struct paravirt_callee_save lock_spinning; void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); -#endif /* !CONFIG_QUEUED_SPINLOCK */ +#endif /* !CONFIG_QUEUED_SPINLOCKS */ }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4ec5413156ca..be0a05913b91 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -42,7 +42,7 @@ extern struct static_key paravirt_ticketlocks_enabled; static __always_inline bool static_key_false(struct static_key *key); -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS #include #else @@ -200,7 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } } -#endif /* CONFIG_QUEUED_SPINLOCK */ +#endif /* CONFIG_QUEUED_SPINLOCKS */ /* * Read-write spinlocks, allowing multiple readers diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 5df1f1b9a4b0..65c3e37f879a 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -23,7 +23,7 @@ typedef u32 __ticketpair_t; #define TICKET_SHIFT (sizeof(__ticket_t) * 8) -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS #include #else typedef struct arch_spinlock { @@ -36,7 +36,7 @@ typedef struct arch_spinlock { } arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } -#endif /* CONFIG_QUEUED_SPINLOCK */ +#endif /* CONFIG_QUEUED_SPINLOCKS */ #include diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6c21d931bd24..1681504e44a4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -585,7 +585,7 @@ static void kvm_kick_cpu(int cpu) } -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS #include @@ -615,7 +615,7 @@ out: local_irq_restore(flags); } -#else /* !CONFIG_QUEUED_SPINLOCK */ +#else /* !CONFIG_QUEUED_SPINLOCKS */ enum kvm_contention_stat { TAKEN_SLOW, @@ -850,7 +850,7 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) } } -#endif /* !CONFIG_QUEUED_SPINLOCK */ +#endif /* !CONFIG_QUEUED_SPINLOCKS */ /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. @@ -863,13 +863,13 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = kvm_wait; pv_lock_ops.kick = kvm_kick_cpu; -#else /* !CONFIG_QUEUED_SPINLOCK */ +#else /* !CONFIG_QUEUED_SPINLOCKS */ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); pv_lock_ops.unlock_kick = kvm_unlock_kick; #endif diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index a33f1eb15003..33ee3e0efd65 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -8,7 +8,7 @@ #include -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS __visible void __native_queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); @@ -25,15 +25,15 @@ bool pv_is_native_spin_unlock(void) struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, -#else /* !CONFIG_QUEUED_SPINLOCK */ +#else /* !CONFIG_QUEUED_SPINLOCKS */ .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), .unlock_kick = paravirt_nop, -#endif /* !CONFIG_QUEUED_SPINLOCK */ +#endif /* !CONFIG_QUEUED_SPINLOCKS */ #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index e0fb41c8255b..a1fa86782186 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -21,7 +21,7 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); #endif @@ -65,7 +65,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { start = start_pv_lock_ops_queued_spin_unlock; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index af907a90fb19..9e2ba5c6e1dd 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); static bool xen_pvspin = true; -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS #include @@ -65,7 +65,7 @@ static void xen_qlock_wait(u8 *byte, u8 val) xen_poll_irq(irq); } -#else /* CONFIG_QUEUED_SPINLOCK */ +#else /* CONFIG_QUEUED_SPINLOCKS */ enum xen_contention_stat { TAKEN_SLOW, @@ -264,7 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) } } } -#endif /* CONFIG_QUEUED_SPINLOCK */ +#endif /* CONFIG_QUEUED_SPINLOCKS */ static irqreturn_t dummy_handler(int irq, void *dev_id) { @@ -328,7 +328,7 @@ void __init xen_init_spinlocks(void) return; } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); -#ifdef CONFIG_QUEUED_SPINLOCK +#ifdef CONFIG_QUEUED_SPINLOCKS __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); @@ -366,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg) } early_param("xen_nopvspin", xen_parse_nopvspin); -#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCK) +#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS) static struct dentry *d_spin_debug; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 95dd7587ec34..65d755b6a663 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -235,11 +235,11 @@ config LOCK_SPIN_ON_OWNER def_bool y depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER -config ARCH_USE_QUEUED_SPINLOCK +config ARCH_USE_QUEUED_SPINLOCKS bool -config QUEUED_SPINLOCK - def_bool y if ARCH_USE_QUEUED_SPINLOCK +config QUEUED_SPINLOCKS + def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP config ARCH_USE_QUEUE_RWLOCK diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index abfcef3c1ef9..132aff9d3fbe 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o obj-$(CONFIG_SMP) += lglock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o -obj-$(CONFIG_QUEUED_SPINLOCK) += qspinlock.o +obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o From 6c434d6176c0cb42847c33245189667d645db7bf Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 11 May 2015 10:15:49 +0200 Subject: [PATCH 065/481] x86/mm: Do not flush last cacheline twice in clflush_cache_range() The current algorithm used in clflush_cache_range() can cause the last cache line of the buffer to be flushed twice. Fix that algorithm so that each cache line will only be flushed once. Reported-by: H. Peter Anvin Signed-off-by: Ross Zwisler Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/1430259192-18802-1-git-send-email-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/1431332153-18566-5-git-send-email-bp@alien8.de [ Changed it to 'void *' to simplify the type conversions. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 89af288ec674..5ddd9005f6c3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -129,16 +129,15 @@ within(unsigned long addr, unsigned long start, unsigned long end) */ void clflush_cache_range(void *vaddr, unsigned int size) { - void *vend = vaddr + size - 1; + unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + void *vend = vaddr + size; + void *p; mb(); - for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflushopt(vaddr); - /* - * Flush any possible final partial cacheline: - */ - clflushopt(vend); + for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + p < vend; p += boot_cpu_data.x86_clflush_size) + clflushopt(p); mb(); } From ca7d9b795e6bc78c80a1771ada867994fabcfc01 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 11 May 2015 10:15:51 +0200 Subject: [PATCH 066/481] x86/mm: Add kerneldoc comments for pcommit_sfence() Add kerneldoc comments for pcommit_sfence() describing the purpose of the PCOMMIT instruction and demonstrating its usage with an example. Signed-off-by: Ross Zwisler Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H Peter Anvin Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/1430261196-2401-1-git-send-email-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/1431332153-18566-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/special_insns.h | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index aeb4666e0c0a..2270e41b32fd 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p) : [pax] "a" (p)); } +/** + * pcommit_sfence() - persistent commit and fence + * + * The PCOMMIT instruction ensures that data that has been flushed from the + * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to + * memory and is durable on the DIMM. The primary use case for this is + * persistent memory. + * + * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT + * with appropriate fencing. + * + * Example: + * void flush_and_commit_buffer(void *vaddr, unsigned int size) + * { + * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + * void *vend = vaddr + size; + * void *p; + * + * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + * p < vend; p += boot_cpu_data.x86_clflush_size) + * clwb(p); + * + * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes + * // MFENCE via mb() also works + * wmb(); + * + * // PCOMMIT and the required SFENCE for ordering + * pcommit_sfence(); + * } + * + * After this function completes the data pointed to by 'vaddr' has been + * accepted to memory and will be durable if the 'vaddr' points to persistent + * memory. + * + * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify + * things we include both the PCOMMIT and the required SFENCE in the + * alternatives generated by pcommit_sfence(). + */ static inline void pcommit_sfence(void) { alternative(ASM_NOP7, From cd2f6a5a4704a359635eb34919317052e6a96ba7 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 11 May 2015 10:15:52 +0200 Subject: [PATCH 067/481] x86/mm/mtrr: Remove incorrect address check in __mtrr_type_lookup() __mtrr_type_lookup() checks MTRR fixed ranges when mtrr_state.have_fixed is set and start is less than 0x100000. However, the 'else if (start < 0x1000000)' in the code checks with an incorrect address as it has an extra-zero in the address. The code still runs correctly as this check is meaningless, though. This patch replaces the incorrect address check with 'else' with no condition. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1427234921-19737-4-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1431332153-18566-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7d74f7b3c6ba..5b239679cfc9 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -137,7 +137,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) idx = 1 * 8; idx += ((start - 0x80000) >> 14); return mtrr_state.fixed_ranges[idx]; - } else if (start < 0x1000000) { + } else { idx = 3 * 8; idx += ((start - 0xC0000) >> 12); return mtrr_state.fixed_ranges[idx]; From e4b6be33c28923d8cde53023e0888b1c5d1a9027 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 May 2015 10:15:53 +0200 Subject: [PATCH 068/481] x86/mm: Add ioremap_uc() helper to map memory uncacheable (not UC-) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ioremap_nocache() currently uses UC- by default. Our goal is to eventually make UC the default. Linux maps UC- to PCD=1, PWT=0 page attributes on non-PAT systems. Linux maps UC to PCD=1, PWT=1 page attributes on non-PAT systems. On non-PAT and PAT systems a WC MTRR has different effects on pages with either of these attributes. In order to help with a smooth transition its best to enable use of UC (PCD,1, PWT=1) on a region as that ensures a WC MTRR will have no effect on a region, this however requires us to have an way to declare a region as UC and we currently do not have a way to do this. WC MTRR on non-PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on non-PAT system with PCD=1, PWT=1 (UC) yields UC. WC MTRR on PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on PAT system with PCD=1, PWT=1 (UC) yields UC. A flip of the default ioremap_nocache() behaviour from UC- to UC can therefore regress a memory region from effective memory type WC to UC if MTRRs are used. Use of MTRRs should be phased out and in the best case only arch_phys_wc_add() use will remain, even if this happens arch_phys_wc_add() will have an effect on non-PAT systems and changes to default ioremap_nocache() behaviour could regress drivers. Now, ideally we'd use ioremap_nocache() on the regions in which we'd need uncachable memory types and avoid any MTRRs on those regions. There are however some restrictions on MTRRs use, such as the requirement of having the base and size of variable sized MTRRs to be powers of two, which could mean having to use a WC MTRR over a large area which includes a region in which write-combining effects are undesirable. Add ioremap_uc() to help with the both phasing out of MTRR use and also provide a way to blacklist small WC undesirable regions in devices with mixed regions which are size-implicated to use large WC MTRRs. Use of ioremap_uc() helps phase out MTRR use by avoiding regressions with an eventual flip of default behaviour or ioremap_nocache() from UC- to UC. Drivers working with WC MTRRs can use the below table to review and consider the use of ioremap*() and similar helpers to ensure appropriate behaviour long term even if default ioremap_nocache() behaviour changes from UC- to UC. Although ioremap_uc() is being added we leave set_memory_uc() to use UC- as only initial memory type setup is required to be able to accommodate existing device drivers and phase out MTRR use. It should also be clarified that set_memory_uc() cannot be used with IO memory, even though its use will not return any errors, it really has no effect. ---------------------------------------------------------------------- MTRR Non-PAT PAT Linux ioremap value Effective memory type ---------------------------------------------------------------------- Non-PAT | PAT PAT |PCD ||PWT ||| WC 000 WB _PAGE_CACHE_MODE_WB WC | WC WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | WC WC 011 UC _PAGE_CACHE_MODE_UC UC | UC ---------------------------------------------------------------------- Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Acked-by: H. Peter Anvin Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Travis Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: linux-fbdev@vger.kernel.org Link: http://lkml.kernel.org/r/1430343851-967-2-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1431332153-18566-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 1 + arch/x86/mm/ioremap.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/mm/pageattr.c | 3 +++ include/asm-generic/io.h | 8 ++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34a5b93704d3..4afc05ffa566 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -177,6 +177,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * look at pci_iomap(). */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 70e7444c6835..a493bb83aa89 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -237,7 +237,8 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use - * UC MINUS. + * UC MINUS. Drivers that are certain they need or can already + * be converted over to strong UC can use ioremap_uc(). */ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; @@ -246,6 +247,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_nocache); +/** + * ioremap_uc - map bus memory into CPU space as strongly uncachable + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * ioremap_uc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked with a strong + * preference as completely uncachable on the CPU when possible. For non-PAT + * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT + * systems this will set the PAT entry for the pages as strong UC. This call + * will honor existing caching rules from things like the PCI bus. Note that + * there are other caches and buffers on many busses. In particular driver + * authors should read up on PCI writes. + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size) +{ + enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC; + + return __ioremap_caller(phys_addr, size, pcm, + __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(ioremap_uc); + /** * ioremap_wc - map memory into CPU space write combined * @phys_addr: bus address of the memory diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5ddd9005f6c3..c77abd7f92a2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1467,6 +1467,9 @@ int _set_memory_uc(unsigned long addr, int numpages) { /* * for now UC MINUS. see comments in ioremap_nocache() + * If you really need strong UC use ioremap_uc(), but note + * that you cannot override IO areas with set_memory_*() as + * these helpers cannot work with IO memory. */ return change_page_attr_set(&addr, numpages, cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 9db042304df3..90ccba7f9f9a 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -769,6 +769,14 @@ static inline void __iomem *ioremap_nocache(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_uc +#define ioremap_uc ioremap_uc +static inline void __iomem *ioremap_uc(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef ioremap_wc #define ioremap_wc ioremap_wc static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) From 1fcb61c52bbdbbc46d132acf7dab9ad0eca433fe Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 23 Apr 2015 01:07:08 -0700 Subject: [PATCH 069/481] x86/mm/pageattr: Remove an unused variable in slow_virt_to_phys() The patch doesn't change any logic. Signed-off-by: Dexuan Cui Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429776428-4475-1-git-send-email-decui@microsoft.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c77abd7f92a2..397838eb292b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -417,13 +417,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) phys_addr_t phys_addr; unsigned long offset; enum pg_level level; - unsigned long psize; unsigned long pmask; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); - psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; From 9b5a4e395c2f39fae89f75e4a749be5dba342d22 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Mon, 11 May 2015 14:04:14 +0200 Subject: [PATCH 070/481] ALSA: hda - Add headset mic quirk for Dell Inspiron 5548 This enables the headset microphone on Dell Inspiron 5548, or at least some variants of it. Cc: stable@vger.kernel.org BugLink: https://bugs.launchpad.net/bugs/1452175 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4a0a238c9864..37c2c9f91df0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5346,6 +5346,13 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x40000000}, {0x1d, 0x40700001}, {0x21, 0x02211050}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5548", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC255_STANDARD_PINS, + {0x12, 0x90a60180}, + {0x14, 0x90170130}, + {0x17, 0x40000000}, + {0x1d, 0x40700001}, + {0x21, 0x02211040}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS, {0x13, 0x40000000}), From 47b4e1fc4972cc43a19121bc2608a60aef3bf216 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Mon, 11 May 2015 11:31:15 +0200 Subject: [PATCH 071/481] mac80211: move WEP tailroom size check Remove checking tailroom when adding IV as it uses only headroom, and move the check to the ICV generation that actually needs the tailroom. In other case I hit such warning and datapath don't work, when testing: - IBSS + WEP - ath9k with hw crypt enabled - IPv6 data (ping6) WARNING: CPU: 3 PID: 13301 at net/mac80211/wep.c:102 ieee80211_wep_add_iv+0x129/0x190 [mac80211]() [...] Call Trace: [] dump_stack+0x45/0x57 [] warn_slowpath_common+0x8a/0xc0 [] warn_slowpath_null+0x1a/0x20 [] ieee80211_wep_add_iv+0x129/0x190 [mac80211] [] ieee80211_crypto_wep_encrypt+0x6b/0xd0 [mac80211] [] invoke_tx_handlers+0xc51/0xf30 [mac80211] [...] Cc: stable@vger.kernel.org Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- net/mac80211/wep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index a4220e92f0cc..efa3f48f1ec5 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -98,8 +98,7 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || - skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) + if (WARN_ON(skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -167,6 +166,9 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, size_t len; u8 rc4key[3 + WLAN_KEY_LEN_WEP104]; + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN)) + return -1; + iv = ieee80211_wep_add_iv(local, skb, keylen, keyidx); if (!iv) return -1; From 7cbeed9bce7580479bb97457dad220cb3594b875 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 7 May 2015 16:20:15 +0800 Subject: [PATCH 072/481] KVM: MMU: fix smap permission check Current permission check assumes that RSVD bit in PFEC is always zero, however, it is not true since MMIO #PF will use it to quickly identify MMIO access Fix it by clearing the bit if walking guest page table is needed Signed-off-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 ++ arch/x86/kvm/paging_tmpl.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7d65637c851..06eb2fc1bab8 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -166,6 +166,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + WARN_ON(pfec & PFERR_RSVD_MASK); + return (mmu->permissions[index] >> pte_access) & 1; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index fd49c867b25a..6e6d115fe9b5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; + + /* + * page fault with PFEC.RSVD = 1 is caused by shadow + * page fault, should not be used to walk guest page + * table. + */ + error_code &= ~PFERR_RSVD_MASK; }; r = mmu_topup_memory_caches(vcpu); From 898761158be7682082955e3efa4ad24725305fc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 2 Apr 2015 11:04:05 +0200 Subject: [PATCH 073/481] KVM: MMU: fix CR4.SMEP=1, CR0.WP=0 with shadow pages smep_andnot_wp is initialized in kvm_init_shadow_mmu and shadow pages should not be reused for different values of it. Thus, it has to be added to the mask in kvm_mmu_pte_write. Reviewed-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d43867c33bc4..209fe1477465 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4238,7 +4238,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); - mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; + mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { From 0be0226f07d14b153a5eedf2bb86e1eb7dcefab5 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 11 May 2015 22:55:21 +0800 Subject: [PATCH 074/481] KVM: MMU: fix SMAP virtualization KVM may turn a user page to a kernel page when kernel writes a readonly user page if CR0.WP = 1. This shadow page entry will be reused after SMAP is enabled so that kernel is allowed to access this user page Fix it by setting SMAP && !CR0.WP into shadow page's role and reset mmu once CR4.SMAP is updated Signed-off-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/mmu.txt | 18 ++++++++++++++---- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 16 ++++++++++++---- arch/x86/kvm/mmu.h | 2 -- arch/x86/kvm/x86.c | 8 +++----- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 53838d9c6295..c59bd9bc41ef 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -169,6 +169,10 @@ Shadow pages contain the following information: Contains the value of cr4.smep && !cr0.wp for which the page is valid (pages for which this is true are different from other pages; see the treatment of cr0.wp=0 below). + role.smap_andnot_wp: + Contains the value of cr4.smap && !cr0.wp for which the page is valid + (pages for which this is true are different from other pages; see the + treatment of cr0.wp=0 below). gfn: Either the guest page table containing the translations shadowed by this page, or the base page frame for linear translations. See role.direct. @@ -344,10 +348,16 @@ on fault type: (user write faults generate a #PF) -In the first case there is an additional complication if CR4.SMEP is -enabled: since we've turned the page into a kernel page, the kernel may now -execute it. We handle this by also setting spte.nx. If we get a user -fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. +In the first case there are two additional complications: +- if CR4.SMEP is enabled: since we've turned the page into a kernel page, + the kernel may now execute it. We handle this by also setting spte.nx. + If we get a user fetch or read fault, we'll change spte.u=1 and + spte.nx=gpte.nx back. +- if CR4.SMAP is disabled: since the page has been changed to a kernel + page, it can not be reused when CR4.SMAP is enabled. We set + CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note, + here we do not care the case that CR4.SMAP is enabled since KVM will + directly inject #PF to guest due to failed permission check. To prevent an spte that was converted into a kernel page with cr0.wp=0 from being written by the kernel after cr0.wp has changed to 1, we make diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dea2e7e962e3..e61c3a4ee131 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -207,6 +207,7 @@ union kvm_mmu_page_role { unsigned nxe:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; + unsigned smap_andnot_wp:1; }; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 209fe1477465..44a7d2515497 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -void update_permission_bitmask(struct kvm_vcpu *vcpu, - struct kvm_mmu *mmu, bool ept) +static void update_permission_bitmask(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; @@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) { bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); struct kvm_mmu *context = &vcpu->arch.mmu; MMU_WARN_ON(VALID_PAGE(context->root_hpa)); @@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.smep_andnot_wp = smep && !is_write_protection(vcpu); + context->base_role.smap_andnot_wp + = smap && !is_write_protection(vcpu); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; + union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { + .cr0_wp = 1, + .cr4_pae = 1, + .nxe = 1, + .smep_andnot_wp = 1, + .smap_andnot_wp = 1, + }; /* * If we don't have indirect shadow pages, it means no page is @@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); - mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 06eb2fc1bab8..0ada65ecddcf 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -71,8 +71,6 @@ enum { int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); -void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd03e29..986b3f5d0523 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | - X86_CR4_PAE | X86_CR4_SMEP; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP; + if (cr4 & CR4_RESERVED_BITS) return 1; @@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & X86_CR4_SMAP) - update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); - if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); From dc45708ca9988656d706940df5fd102672c5de92 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 1 May 2015 11:03:02 -0700 Subject: [PATCH 075/481] storvsc: Set the SRB flags correctly when no data transfer is needed Set the SRB flags correctly when there is no data transfer. Without this change some IHV drivers will fail valid commands such as TEST_UNIT_READY. Cc: Signed-off-by: K. Y. Srinivasan Reviewed-by: Long Li Signed-off-by: James Bottomley --- drivers/scsi/storvsc_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index d9dad90344d5..3c6584ff65c1 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1600,8 +1600,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) break; default: vm_srb->data_in = UNKNOWN_TYPE; - vm_srb->win8_extension.srb_flags |= (SRB_FLAGS_DATA_IN | - SRB_FLAGS_DATA_OUT); + vm_srb->win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; break; } From 8b2564ec7410928639db5c09a34d7d8330f1d759 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 28 Apr 2015 18:26:22 +1000 Subject: [PATCH 076/481] lpfc: Fix breakage on big endian kernels This reverts 4fbdf9cb it breaks LPFC on POWER7 machine, big endian kernel. Without this, the kernel enters an infinite oops loop. This is the hardware used for verification: 0005:01:00.0 Fibre Channel [0c04]: Emulex Corporation Saturn-X: LightPulse Fibre Channel Host Adapter [10df:f100] (rev 03) 0005:01:00.1 Fibre Channel [0c04]: Emulex Corporation Saturn-X: LightPulse Fibre Channel Host Adapter [10df:f100] (rev 03) Signed-off-by: Alexey Kardashevskiy Reviewed-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc_scsi.c | 41 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index cb73cf9e9ba5..c140f99772ca 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1129,25 +1129,6 @@ lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb) phba->lpfc_release_scsi_buf(phba, psb); } -/** - * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB - * @data: A pointer to the immediate command data portion of the IOCB. - * @fcp_cmnd: The FCP Command that is provided by the SCSI layer. - * - * The routine copies the entire FCP command from @fcp_cmnd to @data while - * byte swapping the data to big endian format for transmission on the wire. - **/ -static void -lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd) -{ - int i, j; - - for (i = 0, j = 0; i < sizeof(struct fcp_cmnd); - i += sizeof(uint32_t), j++) { - ((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]); - } -} - /** * lpfc_scsi_prep_dma_buf_s3 - DMA mapping for scsi buffer to SLI3 IF spec * @phba: The Hba for which this call is being executed. @@ -1283,7 +1264,6 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) * we need to set word 4 of IOCB here */ iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd); - lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd); return 0; } @@ -4146,6 +4126,24 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_release_scsi_buf(phba, lpfc_cmd); } +/** + * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB + * @data: A pointer to the immediate command data portion of the IOCB. + * @fcp_cmnd: The FCP Command that is provided by the SCSI layer. + * + * The routine copies the entire FCP command from @fcp_cmnd to @data while + * byte swapping the data to big endian format for transmission on the wire. + **/ +static void +lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd) +{ + int i, j; + for (i = 0, j = 0; i < sizeof(struct fcp_cmnd); + i += sizeof(uint32_t), j++) { + ((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]); + } +} + /** * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit * @vport: The virtual port for which this call is being executed. @@ -4225,6 +4223,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, fcp_cmnd->fcpCntl3 = 0; phba->fc4ControlRequests++; } + if (phba->sli_rev == 3 && + !(phba->sli3_options & LPFC_SLI3_BG_ENABLED)) + lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd); /* * Finish initializing those IOCB fields that are independent * of the scsi_cmnd request_buffer From 13bd817bb88499ce1dc1dfdaffcde17fa492aca5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 11 May 2015 11:56:01 +0530 Subject: [PATCH 077/481] powerpc/thp: Serialize pmd clear against a linux page table walk. Serialize against find_linux_pte_or_hugepte() which does lock-less lookup in page tables with local interrupts disabled. For huge pages it casts pmd_t to pte_t. Since the format of pte_t is different from pmd_t we want to prevent transit from pmd pointing to page table to pmd pointing to huge page (and back) while interrupts are disabled. We clear pmd to possibly replace it with page table pointer in different code paths. So make sure we wait for the parallel find_linux_pte_or_hugepage() to finish. Without this patch, a find_linux_pte_or_hugepte() running in parallel to __split_huge_zero_page_pmd() or do_huge_pmd_wp_page_fallback() or zap_huge_pmd() can run into the above issue. With __split_huge_zero_page_pmd() and do_huge_pmd_wp_page_fallback() we clear the hugepage pte before inserting the pmd entry with a regular pgtable address. Such a clear need to wait for the parallel find_linux_pte_or_hugepte() to finish. With zap_huge_pmd(), we can run into issues, with a hugepage pte getting zapped due to a MADV_DONTNEED while other cpu fault it in as small pages. Reported-by: Kirill A. Shutemov Signed-off-by: Aneesh Kumar K.V Reviewed-by: Kirill A. Shutemov Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable_64.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 59daa5eeec25..6bfadf1aa5cb 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm, * hash fault look at them. */ memset(pgtable, 0, PTE_FRAG_SIZE); + /* + * Serialize against find_linux_pte_or_hugepte which does lock-less + * lookup in page tables with local interrupts disabled. For huge pages + * it casts pmd_t to pte_t. Since format of pte_t is different from + * pmd_t we want to prevent transit from pmd pointing to page table + * to pmd pointing to huge page (and back) while interrupts are disabled. + * We clear pmd to possibly replace it with page table pointer in + * different code paths. So make sure we wait for the parallel + * find_linux_pte_or_hugepage to finish. + */ + kick_all_cpus_sync(); return old_pmd; } From 7b868e81be38d5ad4f4aa4be819a5fa543cc5ee8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 11 May 2015 11:58:29 +0530 Subject: [PATCH 078/481] powerpc/mm: Return NULL for not present hugetlb page We need to check whether pte is present in follow_huge_addr() and properly return NULL if mapping is not present. Also use READ_ONCE when dereferencing pte_t address. Without this patch, we may wrongly return a zero pfn page in follow_huge_addr(). Reviewed-by: David Gibson Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ce968b00b7c..3385e3d0506e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - pte_t *ptep; - struct page *page; + pte_t *ptep, pte; unsigned shift; unsigned long mask, flags; + struct page *page = ERR_PTR(-EINVAL); + + local_irq_save(flags); + ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); + if (!ptep) + goto no_page; + pte = READ_ONCE(*ptep); /* + * Verify it is a huge page else bail. * Transparent hugepages are handled by generic code. We can skip them * here. */ - local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); + if (!shift || pmd_trans_huge(__pmd(pte_val(pte)))) + goto no_page; - /* Verify it is a huge page else bail. */ - if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) { - local_irq_restore(flags); - return ERR_PTR(-EINVAL); + if (!pte_present(pte)) { + page = NULL; + goto no_page; } mask = (1UL << shift) - 1; - page = pte_page(*ptep); + page = pte_page(pte); if (page) page += (address & mask) / PAGE_SIZE; +no_page: local_irq_restore(flags); return page; } From c7114b4e6c53111d415485875725b60213ffc675 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 11 May 2015 13:57:11 -0400 Subject: [PATCH 079/481] locking/qrwlock: Rename QUEUE_RWLOCK to QUEUED_RWLOCKS To be consistent with the queued spinlocks which use CONFIG_QUEUED_SPINLOCKS config parameter, the one for the queued rwlocks is now renamed to CONFIG_QUEUED_RWLOCKS. Signed-off-by: Waiman Long Cc: Borislav Petkov Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431367031-36697-1-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- kernel/Kconfig.locks | 6 +++--- kernel/locking/Makefile | 2 +- kernel/locking/qrwlock.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f8dc6abbe6ae..4e986e809861 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -128,7 +128,7 @@ config X86 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_USE_QUEUE_RWLOCK + select ARCH_USE_QUEUED_RWLOCKS select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 65d755b6a663..ebdb0043203a 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -242,9 +242,9 @@ config QUEUED_SPINLOCKS def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP -config ARCH_USE_QUEUE_RWLOCK +config ARCH_USE_QUEUED_RWLOCKS bool -config QUEUE_RWLOCK - def_bool y if ARCH_USE_QUEUE_RWLOCK +config QUEUED_RWLOCKS + def_bool y if ARCH_USE_QUEUED_RWLOCKS depends on SMP diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 132aff9d3fbe..7dd5c9918e4c 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -26,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o -obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o +obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index f956ede7f90d..00c12bb390b5 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -1,5 +1,5 @@ /* - * Queue read/write lock + * Queued read/write locks * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From 6d86750ce62391f5a0a7985d5c050c2e3c823ab9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 May 2015 17:23:25 +0200 Subject: [PATCH 080/481] gpio: fix gpio leak in gpiochip_add error path Make sure to free any hogged gpios on errors in gpiochip_add. Also move all forward declarations to the top of the file. Fixes: f625d4601759 ("gpio: add GPIO hogging mechanism") Signed-off-by: Johan Hovold Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 59eaa23767d8..6bc612b8a49f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -53,6 +53,11 @@ static DEFINE_MUTEX(gpio_lookup_lock); static LIST_HEAD(gpio_lookup_list); LIST_HEAD(gpio_chips); + +static void gpiochip_free_hogs(struct gpio_chip *chip); +static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); + + static inline void desc_set_label(struct gpio_desc *d, const char *label) { d->label = label; @@ -297,6 +302,7 @@ int gpiochip_add(struct gpio_chip *chip) err_remove_chip: acpi_gpiochip_remove(chip); + gpiochip_free_hogs(chip); of_gpiochip_remove(chip); spin_lock_irqsave(&gpio_lock, flags); list_del(&chip->list); @@ -313,10 +319,6 @@ err_free_descs: } EXPORT_SYMBOL_GPL(gpiochip_add); -/* Forward-declaration */ -static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); -static void gpiochip_free_hogs(struct gpio_chip *chip); - /** * gpiochip_remove() - unregister a gpio_chip * @chip: the chip to unregister From ffb2d78eca08a1451137583d4e435aecfd6af809 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 12 May 2015 13:23:59 +1000 Subject: [PATCH 081/481] powerpc/mce: fix off by one errors in mce event handling Before 69111bac42f5 ("powerpc: Replace __get_cpu_var uses"), in save_mce_event, index got the value of mce_nest_count, and mce_nest_count was incremented *after* index was set. However, that patch changed the behaviour so that mce_nest count was incremented *before* setting index. This causes an off-by-one error, as get_mce_event sets index as mce_nest_count - 1 before reading mce_event. Thus get_mce_event reads bogus data, causing warnings like "Machine Check Exception, Unknown event version 0 !" and breaking MCEs handling. Restore the old behaviour and unbreak MCE handling by subtracting one from the newly incremented value. The same broken change occured in machine_check_queue_event (which set a queue read by machine_check_process_queued_event). Fix that too, unbreaking printing of MCE information. Fixes: 69111bac42f5 ("powerpc: Replace __get_cpu_var uses") CC: stable@vger.kernel.org CC: Mahesh Salgaonkar CC: Christoph Lameter Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15c99b649b04..b2eb4686bd8f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __this_cpu_inc_return(mce_nest_count); + int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* @@ -184,7 +184,7 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count); + index = __this_cpu_inc_return(mce_queue_count) - 1; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { __this_cpu_dec(mce_queue_count); From f230e8ffc03f17bd9d6b90ea890b8252a8cc1821 Mon Sep 17 00:00:00 2001 From: Michael Brunner Date: Mon, 11 May 2015 12:46:49 +0200 Subject: [PATCH 082/481] gpio: gpio-kempld: Fix get_direction return value This patch fixes an inverted return value of the gpio get_direction function. The wrong value causes the direction sysfs entry and GPIO debugfs file to indicate incorrect GPIO direction settings. In some cases it also prevents setting GPIO output values. The problem is also present in all other stable kernel versions since linux-3.12. Cc: Stable # v3.12+ Reported-by: Jochen Henneberg Signed-off-by: Michael Brunner Reviewed-by: Guenter Roeck Signed-off-by: Linus Walleij --- drivers/gpio/gpio-kempld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c index 6b8115f34208..83f281dda1e0 100644 --- a/drivers/gpio/gpio-kempld.c +++ b/drivers/gpio/gpio-kempld.c @@ -117,7 +117,7 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset) = container_of(chip, struct kempld_gpio_data, chip); struct kempld_device_data *pld = gpio->pld; - return kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset); + return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset); } static int kempld_gpio_pincount(struct kempld_device_data *pld) From e6c906dedb8a332ece0e789980eef340fdcd9e20 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 12 May 2015 13:35:37 +0300 Subject: [PATCH 083/481] pinctrl: cherryview: Read triggering type from HW if not set when requested If a driver does not set interrupt triggering type when it calls request_irq(), it means use the pin as the hardware/firmware has configured it. There are some drivers doing this. One example is drivers/input/serio/i8042.c that requests the interrupt like: error = request_irq(I8042_KBD_IRQ, i8042_interrupt, IRQF_SHARED, "i8042", i8042_platform_device); It assumes the interrupt is already properly configured. This is true in case of interrupts connected to the IO-APIC. However, some Intel Braswell/Cherryview based machines use a GPIO here instead for the internal keyboard controller. This is a problem because even if the pin/interrupt is properly configured, the irqchip ->irq_set_type() will never be called as the triggering flags are 0. Because of that we do not have correct interrupt flow handler set for the interrupt. Fix this by adding a custom ->irq_startup() that checks if the interrupt has no triggering type set and in that case read the type directly from the hardware and install correct flow handler along with the mapping. Reported-by: Jagadish Krishnamoorthy Reported-by: Freddy Paul Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 82f691eeeec4..732ff757a95f 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1292,6 +1292,49 @@ static void chv_gpio_irq_unmask(struct irq_data *d) chv_gpio_irq_mask_unmask(d, false); } +static unsigned chv_gpio_irq_startup(struct irq_data *d) +{ + /* + * Check if the interrupt has been requested with 0 as triggering + * type. In that case it is assumed that the current values + * programmed to the hardware are used (e.g BIOS configured + * defaults). + * + * In that case ->irq_set_type() will never be called so we need to + * read back the values from hardware now, set correct flow handler + * and update mappings before the interrupt is being used. + */ + if (irqd_get_trigger_type(d) == IRQ_TYPE_NONE) { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc); + unsigned offset = irqd_to_hwirq(d); + int pin = chv_gpio_offset_to_pin(pctrl, offset); + irq_flow_handler_t handler; + unsigned long flags; + u32 intsel, value; + + intsel = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0)); + intsel &= CHV_PADCTRL0_INTSEL_MASK; + intsel >>= CHV_PADCTRL0_INTSEL_SHIFT; + + value = readl(chv_padreg(pctrl, pin, CHV_PADCTRL1)); + if (value & CHV_PADCTRL1_INTWAKECFG_LEVEL) + handler = handle_level_irq; + else + handler = handle_edge_irq; + + spin_lock_irqsave(&pctrl->lock, flags); + if (!pctrl->intr_lines[intsel]) { + __irq_set_handler_locked(d->irq, handler); + pctrl->intr_lines[intsel] = offset; + } + spin_unlock_irqrestore(&pctrl->lock, flags); + } + + chv_gpio_irq_unmask(d); + return 0; +} + static int chv_gpio_irq_type(struct irq_data *d, unsigned type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1357,6 +1400,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type) static struct irq_chip chv_gpio_irqchip = { .name = "chv-gpio", + .irq_startup = chv_gpio_irq_startup, .irq_ack = chv_gpio_irq_ack, .irq_mask = chv_gpio_irq_mask, .irq_unmask = chv_gpio_irq_unmask, From 2d94e5224e81c58986a8cf44a3bf4830ce5cb96e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 May 2015 14:26:34 -0700 Subject: [PATCH 084/481] HID: hid-sensor-hub: Fix debug lock warning When CONFIG_DEBUG_LOCK_ALLOC is defined, mutex magic is compared and warned for (l->magic != l), here l is the address of mutex pointer. In hid-sensor-hub as part of hsdev creation, a per hsdev mutex is initialized during MFD cell creation. This hsdev, which contains, mutex is part of platform data for the a cell. But platform_data is copied in platform_device_add_data() in platform.c. This copy will copy the whole hsdev structure including mutex. But once copied the magic will no longer match. So when client driver call sensor_hub_input_attr_get_raw_value, this will trigger mutex warning. So to avoid this allocate mutex dynamically. This will be same even after copy. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 13 ++++++++++--- include/linux/hid-sensor-hub.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index c3f6f1e311ea..090a1ba0abb6 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -294,7 +294,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, if (!report) return -EINVAL; - mutex_lock(&hsdev->mutex); + mutex_lock(hsdev->mutex_ptr); if (flag == SENSOR_HUB_SYNC) { memset(&hsdev->pending, 0, sizeof(hsdev->pending)); init_completion(&hsdev->pending.ready); @@ -328,7 +328,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, kfree(hsdev->pending.raw_data); hsdev->pending.status = false; } - mutex_unlock(&hsdev->mutex); + mutex_unlock(hsdev->mutex_ptr); return ret_val; } @@ -667,7 +667,14 @@ static int sensor_hub_probe(struct hid_device *hdev, hsdev->vendor_id = hdev->vendor; hsdev->product_id = hdev->product; hsdev->usage = collection->usage; - mutex_init(&hsdev->mutex); + hsdev->mutex_ptr = devm_kzalloc(&hdev->dev, + sizeof(struct mutex), + GFP_KERNEL); + if (!hsdev->mutex_ptr) { + ret = -ENOMEM; + goto err_stop_hw; + } + mutex_init(hsdev->mutex_ptr); hsdev->start_collection_index = i; if (last_hsdev) last_hsdev->end_collection_index = i; diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..0042bf330b99 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -74,7 +74,7 @@ struct sensor_hub_pending { * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection - * @mutex: synchronizing mutex. + * @mutex_ptr: synchronizing mutex pointer. * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { @@ -84,7 +84,7 @@ struct hid_sensor_hub_device { u32 usage; int start_collection_index; int end_collection_index; - struct mutex mutex; + struct mutex *mutex_ptr; struct sensor_hub_pending pending; }; From fdb6eb0a12871d5bfaf266c5a0d5259a5437a72f Mon Sep 17 00:00:00 2001 From: Koro Chen Date: Mon, 11 May 2015 10:36:53 +0800 Subject: [PATCH 085/481] ASoC: dapm: Modify widget stream name according to prefix When there is prefix specified, currently we will add this prefix in widget->name, but not in widget->sname. it causes failure at snd_soc_dapm_link_dai_widgets: if (!w->sname || !strstr(w->sname, dai_w->name)) because dai_w->name has prefix added, but w->sname does not. We should also add prefix for stream name Signed-off-by: Koro Chen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/soc-dapm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index defe0f0082b5..158204d08924 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3100,11 +3100,16 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, } prefix = soc_dapm_prefix(dapm); - if (prefix) + if (prefix) { w->name = kasprintf(GFP_KERNEL, "%s %s", prefix, widget->name); - else + if (widget->sname) + w->sname = kasprintf(GFP_KERNEL, "%s %s", prefix, + widget->sname); + } else { w->name = kasprintf(GFP_KERNEL, "%s", widget->name); - + if (widget->sname) + w->sname = kasprintf(GFP_KERNEL, "%s", widget->sname); + } if (w->name == NULL) { kfree(w); return NULL; From 85e36a1f4a735d991ba5106781ea48e89a0b8901 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Tue, 12 May 2015 14:58:36 +0800 Subject: [PATCH 086/481] ASoC: wm8960: fix "RINPUT3" audio route error It should be "RINPUT3" instead of "LINPUT3" route to "Right Input Mixer". Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 3035d9856415..e97a7615df85 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -395,7 +395,7 @@ static const struct snd_soc_dapm_route audio_paths[] = { { "Right Input Mixer", "Boost Switch", "Right Boost Mixer", }, { "Right Input Mixer", NULL, "RINPUT1", }, /* Really Boost Switch */ { "Right Input Mixer", NULL, "RINPUT2" }, - { "Right Input Mixer", NULL, "LINPUT3" }, + { "Right Input Mixer", NULL, "RINPUT3" }, { "Left ADC", NULL, "Left Input Mixer" }, { "Right ADC", NULL, "Right Input Mixer" }, From 17fc2e0a3db11889e942c5ab15a1fcb876638f25 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Tue, 12 May 2015 14:58:50 +0800 Subject: [PATCH 087/481] ASoC: wm8994: correct BCLK DIV 348 to 384 According to the RM of wm8958, BCLK DIV 348 doesn't exist, correct it to 384. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8994.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 4fbc7689339a..a1c04dab6684 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -2754,7 +2754,7 @@ static struct { }; static int fs_ratios[] = { - 64, 128, 192, 256, 348, 512, 768, 1024, 1408, 1536 + 64, 128, 192, 256, 384, 512, 768, 1024, 1408, 1536 }; static int bclk_divs[] = { From 336b7e1f230912cd8df2497be8dd7be4647d8fc8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 May 2015 14:06:32 -0400 Subject: [PATCH 088/481] block: remove export for blk_queue_bio With commit ff36ab345 ("dm: remove request-based logic from make_request_fn wrapper") DM no longer calls blk_queue_bio() directly, so remove its export. Doing so required a forward declaration in blk-core.c. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- include/linux/blkdev.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 7871603f0a29..03b5f8d77f37 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -734,6 +734,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) } EXPORT_SYMBOL(blk_init_queue_node); +static void blk_queue_bio(struct request_queue *q, struct bio *bio); + struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, spinlock_t *lock) @@ -1578,7 +1580,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -void blk_queue_bio(struct request_queue *q, struct bio *bio) +static void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1686,7 +1688,6 @@ out_unlock: spin_unlock_irq(q->queue_lock); } } -EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..5d93a6645e88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -821,8 +821,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern void blk_queue_bio(struct request_queue *q, struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be From 64aa42338e9a88c139b89797163714f0f95f3c6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 May 2015 15:26:10 +0800 Subject: [PATCH 089/481] esp4: Use high-order sequence number bits for IV generation I noticed we were only using the low-order bits for IV generation when ESN is enabled. This is very bad because it means that the IV can repeat. We must use the full 64 bits. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 421a80b09b62..30b544f025ac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -256,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); From 6d7258ca937027ae86d6d5938d7ae10b6d68f4a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 May 2015 15:27:18 +0800 Subject: [PATCH 090/481] esp6: Use high-order sequence number bits for IV generation I noticed we were only using the low-order bits for IV generation when ESN is enabled. This is very bad because it means that the IV can repeat. We must use the full 64 bits. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 31f1b5d5e2ef..7c07ce36aae2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -248,7 +248,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); From f60b8d449d850ae4aa3ee4fdaefec6b7882c92f7 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Thu, 30 Apr 2015 15:36:48 +0200 Subject: [PATCH 091/481] s390/zcrypt: Fix invalid domain handling during ap module unload Added domain checking to prevent reset failures caused by invalid domains. Corrected removal sequence of bus attributes and device. Reviewed-by: Harald Freudenberger Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 6e506a88f43e..3ba611419759 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1950,7 +1950,7 @@ static void ap_reset_domain(void) { int i; - if (ap_domain_index != -1) + if ((ap_domain_index != -1) && (ap_test_config_domain(ap_domain_index))) for (i = 0; i < AP_DEVICES; i++) ap_reset_queue(AP_MKQID(i, ap_domain_index)); } @@ -2095,7 +2095,6 @@ void ap_module_exit(void) hrtimer_cancel(&ap_poll_timer); destroy_workqueue(ap_work_queue); tasklet_kill(&ap_tasklet); - root_device_unregister(ap_root_device); while ((dev = bus_find_device(&ap_bus_type, NULL, NULL, __ap_match_all))) { @@ -2104,6 +2103,7 @@ void ap_module_exit(void) } for (i = 0; ap_bus_attrs[i]; i++) bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); + root_device_unregister(ap_root_device); bus_unregister(&ap_bus_type); unregister_reset_call(&ap_reset_call); if (ap_using_interrupts()) From c431761dddff87039edb3300b9b29d1f9e0af2c9 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 4 May 2015 13:27:49 +0200 Subject: [PATCH 092/481] s390/crypto: fix stckf loop The store-clock-fast loop in generate_entropy() mixes (exors) only the first 64 bytes of the initial page before doing the first SHA256. Fix the loop to mix the store-clock-fast values all over the page. Signed-off-by: Harald Freudenberger Reported-by: David Binderman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/prng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 1f374b39a4ec..9d5192c94963 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes) /* fill page with urandom bytes */ get_random_bytes(pg, PAGE_SIZE); /* exor page with stckf values */ - for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) { + for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) { u64 *p = ((u64 *)pg) + n; *p ^= get_tod_clock_fast(); } From f7bcb70ebae0dcdb5a2d859b09e4465784d99029 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 May 2015 13:47:23 -0700 Subject: [PATCH 093/481] ktime: Fix ktime_divns to do signed division It was noted that the 32bit implementation of ktime_divns() was doing unsigned division and didn't properly handle negative values. And when a ktime helper was changed to utilize ktime_divns, it caused a regression on some IR blasters. See the following bugzilla for details: https://bugzilla.redhat.com/show_bug.cgi?id=1200353 This patch fixes the problem in ktime_divns by checking and preserving the sign bit, and then reapplying it if appropriate after the division, it also changes the return type to a s64 to make it more obvious this is expected. Nicolas also pointed out that negative dividers would cause infinite loops on 32bit systems, negative dividers is unlikely for users of this function, but out of caution this patch adds checks for negative dividers for both 32-bit (BUG_ON) and 64-bit(WARN_ON) versions to make sure no such use cases creep in. [ tglx: Hand an u64 to do_div() to avoid the compiler warning ] Fixes: 166afb64511e 'ktime: Sanitize ktime_to_us/ms conversion' Reported-and-tested-by: Trevor Cordes Signed-off-by: John Stultz Acked-by: Nicolas Pitre Cc: Ingo Molnar Cc: Josh Boyer Cc: One Thousand Gnomes Cc: Link: http://lkml.kernel.org/r/1431118043-23452-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/ktime.h | 27 +++++++++++++++++++++------ kernel/time/hrtimer.c | 14 ++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5fc3d1083071..2b6a204bd8d4 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 __ktime_divns(const ktime_t kt, s64 div); -static inline u64 ktime_divns(const ktime_t kt, s64 div) +extern s64 __ktime_divns(const ktime_t kt, s64 div); +static inline s64 ktime_divns(const ktime_t kt, s64 div) { + /* + * Negative divisors could cause an inf loop, + * so bug out here. + */ + BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { - u64 ns = kt.tv64; - do_div(ns, div); - return ns; + s64 ns = kt.tv64; + u64 tmp = ns < 0 ? -ns : ns; + + do_div(tmp, div); + return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) +static inline s64 ktime_divns(const ktime_t kt, s64 div) +{ + /* + * 32-bit implementation cannot handle negative divisors, + * so catch them on 64bit as well. + */ + WARN_ON(div < 0); + return kt.tv64 / div; +} #endif static inline s64 ktime_to_us(const ktime_t kt) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 76d4bd962b19..93ef7190bdea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 __ktime_divns(const ktime_t kt, s64 div) +s64 __ktime_divns(const ktime_t kt, s64 div) { - u64 dclc; int sft = 0; + s64 dclc; + u64 tmp; dclc = ktime_to_ns(kt); + tmp = dclc < 0 ? -dclc : dclc; + /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } - dclc >>= sft; - do_div(dclc, (unsigned long) div); - - return dclc; + tmp >>= sft; + do_div(tmp, (unsigned long) div); + return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ From cede88418b385b50f6841e4b2f1586888b8ab924 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 25 Feb 2015 18:56:13 +0100 Subject: [PATCH 094/481] locking/rtmutex: Drop usage of __HAVE_ARCH_CMPXCHG The rtmutex code is the only user of __HAVE_ARCH_CMPXCHG and we have a few other user of cmpxchg() which do not care about __HAVE_ARCH_CMPXCHG. This define was first introduced in 23f78d4a0 ("[PATCH] pi-futex: rt mutex core") which is v2.6.18. The generic cmpxchg was introduced later in 068fbad288 ("Add cmpxchg_local to asm-generic for per cpu atomic operations") which is v2.6.25. Back then something was required to get rtmutex working with the fast path on architectures without cmpxchg and this seems to be the result. It popped up recently on rt-users because ARM (v6+) does not define __HAVE_ARCH_CMPXCHG (even that it implements it) which results in slower locking performance in the fast path. To put some numbers on it: preempt -RT, am335x, 10 loops of 100000 invocations of rt_spin_lock() + rt_spin_unlock() (time "total" is the average of the 10 loops for the 100000 invocations, "loop" is "total / 100000 * 1000"): cmpxchg | slowpath used || cmpxchg used | total | loop || total | loop --------|-----------|-------||------------|------- ARMv6 | 9129.4 us | 91 ns || 3311.9 us | 33 ns generic | 9360.2 us | 94 ns || 10834.6 us | 108 ns ----------------------------||-------------------- Forcing it to generic cmpxchg() made things worse for the slowpath and even worse in cmpxchg() path. It boils down to 14ns more per lock+unlock in a cache hot loop so it might not be that much in real world. The last test was a substitute for pre ARMv6 machine but then I was able to perform the comparison on imx28 which is ARMv5 and therefore is always is using the generic cmpxchg implementation. And the numbers: | total | loop -------- |----------- |-------- slowpath | 263937.2 us | 2639 ns cmpxchg | 16934.2 us | 169 ns -------------------------------- The numbers are larger since the machine is slower in general. However, letting rtmutex use cmpxchg() instead the slowpath seem to improve things. Since from the ARM (tested on am335x + imx28) point of view always using cmpxchg() in rt_mutex_lock() + rt_mutex_unlock() makes sense I would drop the define. Signed-off-by: Sebastian Andrzej Siewior Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: will.deacon@arm.com Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20150225175613.GE6823@linutronix.de Signed-off-by: Thomas Gleixner --- include/asm-generic/cmpxchg.h | 3 --- kernel/locking/rtmutex.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index 811fb1e9b061..3766ab34aa45 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) /* * Atomic compare and exchange. - * - * Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether - * a cmpxchg primitive faster than repeated local irq save/restore exists. */ #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b73279367087..27dff663f9e4 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) } /* - * We can speed up the acquire/release, if the architecture - * supports cmpxchg and if there's no debugging state to be set up + * We can speed up the acquire/release, if there's no debugging state to be + * set up. */ -#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) +#ifndef CONFIG_DEBUG_RT_MUTEXES # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) { From a22e5f579b98f16e24b7184d01c35de26eb5a7f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 May 2015 10:54:25 +0200 Subject: [PATCH 095/481] arch: Remove __ARCH_HAVE_CMPXCHG We removed the only user of this define in the rtmutex code. Get rid of it. Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior --- arch/alpha/include/asm/cmpxchg.h | 2 -- arch/avr32/include/asm/cmpxchg.h | 2 -- arch/hexagon/include/asm/cmpxchg.h | 1 - arch/ia64/include/uapi/asm/cmpxchg.h | 2 -- arch/m32r/include/asm/cmpxchg.h | 2 -- arch/m68k/include/asm/cmpxchg.h | 1 - arch/metag/include/asm/cmpxchg.h | 2 -- arch/mips/include/asm/cmpxchg.h | 2 -- arch/parisc/include/asm/cmpxchg.h | 2 -- arch/powerpc/include/asm/cmpxchg.h | 1 - arch/s390/include/asm/cmpxchg.h | 2 -- arch/score/include/asm/cmpxchg.h | 2 -- arch/sh/include/asm/cmpxchg.h | 2 -- arch/sparc/include/asm/cmpxchg_32.h | 1 - arch/sparc/include/asm/cmpxchg_64.h | 2 -- arch/tile/include/asm/atomic_64.h | 3 --- arch/x86/include/asm/cmpxchg.h | 2 -- 17 files changed, 31 deletions(-) diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 429e8cd0d78e..e5117766529e 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -66,6 +66,4 @@ #undef __ASM__MB #undef ____cmpxchg -#define __HAVE_ARCH_CMPXCHG 1 - #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h index 962a6aeab787..366bbeaeb405 100644 --- a/arch/avr32/include/asm/cmpxchg.h +++ b/arch/avr32/include/asm/cmpxchg.h @@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( if something tries to do an invalid cmpxchg(). */ extern void __cmpxchg_called_with_bad_pointer(void); -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) { diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 9e7802911a57..a6e34e2acbba 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, * looks just like atomic_cmpxchg on our arch currently with a bunch of * variable casting. */ -#define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr, old, new) \ ({ \ diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h index f35109b1d907..a0e3620f8f13 100644 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void); * indicated by comparing RETURN with OLD. */ -#define __HAVE_ARCH_CMPXCHG 1 - /* * This function doesn't exist, so you'll get a linker error * if something tries to do an invalid cmpxchg(). diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h index de651db20b43..14bf9b739dd2 100644 --- a/arch/m32r/include/asm/cmpxchg.h +++ b/arch/m32r/include/asm/cmpxchg.h @@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size) ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \ sizeof(*(ptr)))) -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) { diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index bc755bc620ad..83b1df80f0ac 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *, * indicated by comparing RETURN with OLD. */ #ifdef CONFIG_RMW_INSNS -#define __HAVE_ARCH_CMPXCHG 1 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, unsigned long new, int size) diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h index b1bc1be8540f..be29e3e44321 100644 --- a/arch/metag/include/asm/cmpxchg.h +++ b/arch/metag/include/asm/cmpxchg.h @@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return old; } -#define __HAVE_ARCH_CMPXCHG 1 - #define cmpxchg(ptr, o, n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 412f945f1f5e..b71ab4a5fd50 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ }) -#define __HAVE_ARCH_CMPXCHG 1 - #define __cmpxchg_asm(ld, st, m, old, new) \ ({ \ __typeof(*(m)) __ret; \ diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index dbd13354ec41..0a90b965cccb 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) #define xchg(ptr, x) \ ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) -#define __HAVE_ARCH_CMPXCHG 1 - /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d463c68fe7f0..ad6263cffb0f 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ -#define __HAVE_ARCH_CMPXCHG 1 static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 4eadec466b8c..411464f4c97a 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -32,8 +32,6 @@ __old; \ }) -#define __HAVE_ARCH_CMPXCHG - #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ ({ \ register __typeof__(*(p1)) __old1 asm("2") = (o1); \ diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h index f384839c3ee5..cc3f6420b71c 100644 --- a/arch/score/include/asm/cmpxchg.h +++ b/arch/score/include/asm/cmpxchg.h @@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m, (unsigned long)(o), \ (unsigned long)(n))) -#define __HAVE_ARCH_CMPXCHG 1 - #include #endif /* _ASM_SCORE_CMPXCHG_H */ diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h index f6bd1406b897..85c97b188d71 100644 --- a/arch/sh/include/asm/cmpxchg.h +++ b/arch/sh/include/asm/cmpxchg.h @@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void); * if something tries to do an invalid cmpxchg(). */ extern void __cmpxchg_called_with_bad_pointer(void); -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, unsigned long new, int size) { diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h index d38b52dca216..83ffb83c5397 100644 --- a/arch/sparc/include/asm/cmpxchg_32.h +++ b/arch/sparc/include/asm/cmpxchg_32.h @@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int * * Cribbed from */ -#define __HAVE_ARCH_CMPXCHG 1 /* bug catcher for when unsupported size is used - won't link */ void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index 0e1ed6cfbf68..faa2f61058c2 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, #include -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg_u32(volatile int *m, int old, int new) { diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index 7b11c5fadd42..0496970cef82 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -/* Define this to indicate that cmpxchg is an efficient operation. */ -#define __HAVE_ARCH_CMPXCHG - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_TILE_ATOMIC_64_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 99c105d78b7e..ad19841eddfe 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -4,8 +4,6 @@ #include #include /* Provides LOCK_PREFIX */ -#define __HAVE_ARCH_CMPXCHG 1 - /* * Non-existant functions to indicate usage errors at link time * (or compile-time if the compiler implements __compiletime_error(). From 849eca7b9dae0364e2fbe8afdf0fb610d12c9c8f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 12 May 2015 10:00:00 -0700 Subject: [PATCH 096/481] HID: usbhid: Add HID_QUIRK_NOGET for Aten DVI KVM switch Like other KVM switches, the Aten DVI KVM switch needs a quirk to avoid spewing errors: [791759.606542] usb 1-5.4: input irq status -75 received [791759.614537] usb 1-5.4: input irq status -75 received [791759.622542] usb 1-5.4: input irq status -75 received Add it. Signed-off-by: Laura Abbott Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 41f167e4d75f..7ce93d927f62 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -164,6 +164,7 @@ #define USB_DEVICE_ID_ATEN_2PORTKVM 0x2204 #define USB_DEVICE_ID_ATEN_4PORTKVM 0x2205 #define USB_DEVICE_ID_ATEN_4PORTKVMC 0x2208 +#define USB_DEVICE_ID_ATEN_CS682 0x2213 #define USB_VENDOR_ID_ATMEL 0x03eb #define USB_DEVICE_ID_ATMEL_MULTITOUCH 0x211c diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index a775143e6265..4696895eb708 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -61,6 +61,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS682, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FIGHTERSTICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE, HID_QUIRK_NOGET }, From 3fd61b209977db8a9fe6c44d5a5a7aee7a255f64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2015 18:00:26 +0200 Subject: [PATCH 097/481] nvme: fix kernel memory corruption with short INQUIRY buffers If userspace asks for an INQUIRY buffer smaller than 36 bytes, the SCSI translation layer will happily write past the end of the INQUIRY buffer allocation. This is fairly easily reproducible by running the libiscsi test suite and then starting an xfstests run. Fixes: 4f1982 ("NVMe: Update SCSI Inquiry VPD 83h translation") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-scsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 88f13c525712..44f2514fb775 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -2257,7 +2257,8 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, page_code = GET_INQ_PAGE_CODE(cmd); alloc_len = GET_INQ_ALLOC_LENGTH(cmd); - inq_response = kmalloc(alloc_len, GFP_KERNEL); + inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH), + GFP_KERNEL); if (inq_response == NULL) { res = -ENOMEM; goto out_mem; From 22ffc3e42aa6a656192a45c7364fbb2de3214d93 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 4 May 2015 12:45:38 -0700 Subject: [PATCH 098/481] Input: sx8654 - fix memory allocation check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have been testing wrong variable when trying to make sure that input allocation succeeded. Reported by Coverity (CID 1295918). Acked-by: Sébastien Szymanski Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/sx8654.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/sx8654.c b/drivers/input/touchscreen/sx8654.c index aecb9ad2e701..642f4a53de50 100644 --- a/drivers/input/touchscreen/sx8654.c +++ b/drivers/input/touchscreen/sx8654.c @@ -187,7 +187,7 @@ static int sx8654_probe(struct i2c_client *client, return -ENOMEM; input = devm_input_allocate_device(&client->dev); - if (!sx8654) + if (!input) return -ENOMEM; input->name = "SX8654 I2C Touchscreen"; From 3c0213d17a09601e0c6c0ae0e27caf70d988290f Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 23 Apr 2015 09:08:43 -0700 Subject: [PATCH 099/481] Input: elantech - fix semi-mt protocol for v3 HW When the v3 hardware sees more than one finger, it uses the semi-mt protocol to report the touches. However, it currently works when num_fingers is 0, 1 or 2, but when it is 3 and above, it sends only 1 finger as if num_fingers was 1. This confuses userspace which knows how to deal with extra fingers when all the slots are used, but not when some are missing. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=90101 Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 991dc6b20a58..79363b687195 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -315,7 +315,7 @@ static void elantech_report_semi_mt_data(struct input_dev *dev, unsigned int x2, unsigned int y2) { elantech_set_slot(dev, 0, num_fingers != 0, x1, y1); - elantech_set_slot(dev, 1, num_fingers == 2, x2, y2); + elantech_set_slot(dev, 1, num_fingers >= 2, x2, y2); } /* From 6b19687563fa6f385ac314afa30b7579e8c3174c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 May 2015 18:26:58 -0400 Subject: [PATCH 100/481] nfs: stat(2) fails during cthon04 basic test5 on NFSv4.0 When running the Connectathon basic tests against a Solaris NFS server over NFSv4.0, test5 reports that stat(2) returns a file size of zero instead of 1MB. On success, nfs_commit_inode() can return a positive result; see other call sites such as nfs_file_fsync_commit() and nfs_commit_unstable_pages(). The call site recently added in nfs_wb_all() does not prevent that positive return value from leaking to its callers. If it leaks through nfs_sync_inode() back to nfs_getattr(), that causes stat(2) to return a positive return value to user space while also not filling in the passed-in struct stat. Additional clean up: the new logic in nfs_wb_all() is rewritten in bfields-normal form. Fixes: 5bb89b4702e2 ("NFSv4.1/pnfs: Separate out metadata . . .") Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d12a4be613a5..dfc19f1575a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1845,12 +1845,15 @@ int nfs_wb_all(struct inode *inode) trace_nfs_writeback_inode_enter(inode); ret = filemap_write_and_wait(inode->i_mapping); - if (!ret) { - ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (!ret) - pnfs_sync_inode(inode, true); - } + if (ret) + goto out; + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + goto out; + pnfs_sync_inode(inode, true); + ret = 0; +out: trace_nfs_writeback_inode_exit(inode, ret); return ret; } From feaff8e5b2cfc3eae02cf65db7a400b0b9ffc596 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 12 May 2015 15:48:10 -0400 Subject: [PATCH 101/481] nfs: take extra reference to fl->fl_file when running a setlk We had a report of a crash while stress testing the NFS client: BUG: unable to handle kernel NULL pointer dereference at 0000000000000150 IP: [] locks_get_lock_context+0x8/0x90 PGD 0 Oops: 0000 [#1] SMP Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ebtable_filter ebtable_broute bridge stp llc ebtables ip6table_security ip6table_mangle ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_raw ip6table_filter ip6_tables iptable_security iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_raw coretemp crct10dif_pclmul ppdev crc32_pclmul crc32c_intel ghash_clmulni_intel vmw_balloon serio_raw vmw_vmci i2c_piix4 shpchp parport_pc acpi_cpufreq parport nfsd auth_rpcgss nfs_acl lockd grace sunrpc vmwgfx drm_kms_helper ttm drm mptspi scsi_transport_spi mptscsih mptbase e1000 ata_generic pata_acpi CPU: 1 PID: 399 Comm: kworker/1:1H Not tainted 4.1.0-0.rc1.git0.1.fc23.x86_64 #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/30/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] task: ffff880036aea7c0 ti: ffff8800791f4000 task.ti: ffff8800791f4000 RIP: 0010:[] [] locks_get_lock_context+0x8/0x90 RSP: 0018:ffff8800791f7c00 EFLAGS: 00010293 RAX: ffff8800791f7c40 RBX: ffff88001f2ad8c0 RCX: ffffe8ffffc80305 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff8800791f7c88 R08: ffff88007fc971d8 R09: 279656d600000000 R10: 0000034a01000000 R11: 279656d600000000 R12: ffff88001f2ad918 R13: ffff88001f2ad8c0 R14: 0000000000000000 R15: 0000000100e73040 FS: 0000000000000000(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000150 CR3: 0000000001c0b000 CR4: 00000000000407e0 Stack: ffffffff8127c5b0 ffff8800791f7c18 ffffffffa0171e29 ffff8800791f7c58 ffffffffa0171ef8 ffff8800791f7c78 0000000000000246 ffff88001ea0ba00 ffff8800791f7c40 ffff8800791f7c40 00000000ff5d86a3 ffff8800791f7ca8 Call Trace: [] ? __posix_lock_file+0x40/0x760 [] ? rpc_make_runnable+0x99/0xa0 [sunrpc] [] ? rpc_wake_up_task_queue_locked.part.35+0xc8/0x250 [sunrpc] [] posix_lock_file_wait+0x4a/0x120 [] ? nfs41_wake_and_assign_slot+0x32/0x40 [nfsv4] [] ? nfs41_sequence_done+0xd8/0x2d0 [nfsv4] [] do_vfs_lock+0x2d/0x30 [nfsv4] [] nfs4_lock_done+0x1ad/0x210 [nfsv4] [] ? __rpc_sleep_on_priority+0x390/0x390 [sunrpc] [] ? __rpc_sleep_on_priority+0x390/0x390 [sunrpc] [] rpc_exit_task+0x2c/0xa0 [sunrpc] [] ? call_refreshresult+0x150/0x150 [sunrpc] [] __rpc_execute+0x90/0x460 [sunrpc] [] rpc_async_schedule+0x15/0x20 [sunrpc] [] process_one_work+0x1bb/0x410 [] worker_thread+0x53/0x480 [] ? process_one_work+0x410/0x410 [] ? process_one_work+0x410/0x410 [] kthread+0xd8/0xf0 [] ? kthread_worker_fn+0x180/0x180 [] ret_from_fork+0x42/0x70 [] ? kthread_worker_fn+0x180/0x180 Jean says: "Running locktests with a large number of iterations resulted in a client crash. The test run took a while and hasn't finished after close to 2 hours. The crash happened right after I gave up and killed the test (after 107m) with Ctrl+C." The crash happened because a NULL inode pointer got passed into locks_get_lock_context. The call chain indicates that file_inode(filp) returned NULL, which means that f_inode was NULL. Since that's zeroed out in __fput, that suggests that this filp pointer outlived the last reference. Looking at the code, that seems possible. We copy the struct file_lock that's passed in, but if the task is signalled at an inopportune time we can end up trying to use that file_lock in rpciod context after the process that requested it has already returned (and possibly put its filp reference). Fix this by taking an extra reference to the filp when we allocate the lock info, and put it in nfs4_lock_release. Reported-by: Jean Spector Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 45b35b9b1e36..55e1e3af23a3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -5604,6 +5605,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -5716,6 +5718,7 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); + fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } From ca79f232054abd079648fdb4400c71a1310f7bc8 Mon Sep 17 00:00:00 2001 From: Wen-chien Jesse Sung Date: Wed, 13 May 2015 11:39:24 +0800 Subject: [PATCH 102/481] Bluetooth: ath3k: Add a new ID 0cf3:e006 to ath3k list Device info in /sys/kernel/debug/usb/devices: T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e006 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Wen-chien Jesse Sung Signed-off-by: Marcel Holtmann --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 288547a3c566..9a05153e1224 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -104,6 +104,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, + { USB_DEVICE(0x0CF3, 0xE006) }, { USB_DEVICE(0x13d3, 0x3362) }, { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x13d3, 0x3393) }, @@ -158,6 +159,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d21f3b4176d3..13d2cae3e13d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -202,6 +202,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, From 2054111b107d9449393d96dce6b66731bbfea9ad Mon Sep 17 00:00:00 2001 From: Wen-chien Jesse Sung Date: Wed, 13 May 2015 11:39:25 +0800 Subject: [PATCH 103/481] Bluetooth: btusb: Add support for 0cf3:e007 Device 0cf3:e007 is one of the QCA ROME family. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e007 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Wen-chien Jesse Sung Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 13d2cae3e13d..7da7efc2cd67 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -219,6 +219,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ + { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, From 6ce47fd961fa8fb206433789d7754c73cab3b5d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 May 2015 22:49:12 +0200 Subject: [PATCH 104/481] rtmutex: Warn if trylock is called from hard/softirq context rt_mutex_trylock() must be called from thread context. It can be called from atomic regions (preemption or interrupts disabled), but not from hard/softirq/nmi context. Add a warning to alert abusers. The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Siewior --- kernel/locking/rtmutex.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 27dff663f9e4..5fa042be94d5 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1441,10 +1441,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); * * @lock: the rt_mutex to be locked * + * This function can only be called in thread context. It's safe to + * call it from atomic regions, but not from hard interrupt or soft + * interrupt context. + * * Returns 1 on success and 0 on contention */ int __sched rt_mutex_trylock(struct rt_mutex *lock) { + if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq())) + return 0; + return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); } EXPORT_SYMBOL_GPL(rt_mutex_trylock); From ec0810d2ac1c932dad48f45da67e3adc5c5449a1 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sat, 2 May 2015 13:36:58 +0300 Subject: [PATCH 105/481] Bluetooth: ath3k: add support of 04ca:300f AR3012 device BugLink: https://bugs.launchpad.net/bugs/1449730 T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=300f Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 9a05153e1224..8c81af6dbe06 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -88,6 +88,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x3007) }, { USB_DEVICE(0x04CA, 0x3008) }, { USB_DEVICE(0x04CA, 0x300b) }, + { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0930, 0x0220) }, @@ -144,6 +145,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7da7efc2cd67..3c10d4dfe9a7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -186,6 +186,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, From a29ef819f3f34f89a1b9b6a939b4c1cdfe1e85ce Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Wed, 13 May 2015 00:18:26 +0200 Subject: [PATCH 106/481] ARM: dts: imx27: only map 4 Kbyte for fec registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the imx27 documentation, fec has a 4 Kbyte memory space map. Moreover, the actual 16 Kbyte mapping overlaps the SCC (Security Controller) memory register space. So, we reduce the memory register space to 4 Kbyte. Signed-off-by: Philippe Reynes Acked-by: Uwe Kleine-König Fixes: 9f0749e3eb88 ("ARM i.MX27: Add devicetree support") Cc: Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx27.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index 6951b66d1ab7..bc215e4b75fd 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -533,7 +533,7 @@ fec: ethernet@1002b000 { compatible = "fsl,imx27-fec"; - reg = <0x1002b000 0x4000>; + reg = <0x1002b000 0x1000>; interrupts = <50>; clocks = <&clks IMX27_CLK_FEC_IPG_GATE>, <&clks IMX27_CLK_FEC_AHB_GATE>; From be346ffaad9bc354075fba5cd009fc4519abdd64 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 May 2015 20:53:14 -0400 Subject: [PATCH 107/481] vlan: Correctly propagate promisc|allmulti flags in notifier. Currently vlan notifier handler will try to update all vlans for a device when that device comes up. A problem occurs, however, when the vlan device was set to promiscuous, but not by the user (ex: a bridge). In that case, dev->gflags are not updated. What results is that the lower device ends up with an extra promiscuity count. Here are the backtraces that prove this: [62852.052179] [] __dev_set_promiscuity+0x38/0x1e0 [62852.052186] [] ? _raw_spin_unlock_bh+0x1b/0x40 [62852.052188] [] ? dev_set_rx_mode+0x2e/0x40 [62852.052190] [] dev_set_promiscuity+0x24/0x50 [62852.052194] [] vlan_dev_open+0xd5/0x1f0 [8021q] [62852.052196] [] __dev_open+0xbf/0x140 [62852.052198] [] __dev_change_flags+0x9d/0x170 [62852.052200] [] dev_change_flags+0x29/0x60 The above comes from the setting the vlan device to IFF_UP state. [62852.053569] [] __dev_set_promiscuity+0x38/0x1e0 [62852.053571] [] ? vlan_dev_set_rx_mode+0x2b/0x30 [8021q] [62852.053573] [] __dev_change_flags+0xe5/0x170 [62852.053645] [] dev_change_flags+0x29/0x60 [62852.053647] [] vlan_device_event+0x18a/0x690 [8021q] [62852.053649] [] notifier_call_chain+0x4c/0x70 [62852.053651] [] raw_notifier_call_chain+0x16/0x20 [62852.053653] [] call_netdevice_notifiers+0x2d/0x60 [62852.053654] [] __dev_notify_flags+0x33/0xa0 [62852.053656] [] dev_change_flags+0x52/0x60 [62852.053657] [] do_setlink+0x397/0xa40 And this one comes from the notification code. What we end up with is a vlan with promiscuity count of 1 and and a physical device with a promiscuity count of 2. They should both have a count 1. To resolve this issue, vlan code can use dev_get_flags() api which correctly masks promiscuity and allmulti flags. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 98a30a5b8664..59555f0f8fc8 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -443,7 +443,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { - flgs = vlandev->flags; + flgs = dev_get_flags(vlandev); if (flgs & IFF_UP) continue; From 5e95235ccd5442d4a4fe11ec4eb99ba1b7959368 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 14 May 2015 14:45:40 +1000 Subject: [PATCH 108/481] powerpc: Align TOC to 256 bytes Recent toolchains force the TOC to be 256 byte aligned. We need to enforce this alignment in our linker script, otherwise pointers to our TOC variables (__toc_start, __prom_init_toc_start) could be incorrect. If they are bad, we die a few hundred instructions into boot. Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f096e72262f4..1db685104ffc 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -213,6 +213,7 @@ SECTIONS *(.opd) } + . = ALIGN(256); .got : AT(ADDR(.got) - LOAD_OFFSET) { __toc_start = .; #ifndef CONFIG_RELOCATABLE From d377c5eb54dd05aeb3094b7740252d19ba7791f7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 14 May 2015 10:04:44 +0200 Subject: [PATCH 109/481] ovl: don't remove non-empty opaque directory When removing an opaque directory we can't just call rmdir() to check for emptiness, because the directory will need to be replaced with a whiteout. The replacement is done with RENAME_EXCHANGE, which doesn't check emptiness. Solution is just to check emptiness by reading the directory. In the future we could add a new rename flag to check for emptiness even for RENAME_EXCHANGE to optimize this case. Reported-by: Vincent Batts Signed-off-by: Miklos Szeredi Tested-by: Jordi Pujol Palomer Fixes: 263b4a0fee43 ("ovl: dont replace opaque dir") Cc: # v4.0+ --- fs/overlayfs/dir.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index d139405d2bfa..2578a0c0677d 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -506,11 +506,25 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) - goto out; + if (is_dir) { + if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; + } else { + LIST_HEAD(list); + + /* + * When removing an empty opaque directory, then it + * makes no sense to replace it with an exact replica of + * itself. But emptiness still needs to be checked. + */ + err = ovl_check_empty_dir(dentry, &list); + ovl_cache_free(&list); + if (err) + goto out; + } } err = ovl_lock_rename_workdir(workdir, upperdir); From 177d0506a911eb60b38b172215df5325ed94fa64 Mon Sep 17 00:00:00 2001 From: Wesley Kuo Date: Wed, 13 May 2015 10:33:15 +0800 Subject: [PATCH 110/481] Bluetooth: Fix remote name event return directly. This patch fixes hci_remote_name_evt dose not resolve name during discovery status is RESOLVING. Before simultaneous dual mode scan enabled, hci_check_pending_name will set discovery status to STOPPED eventually. Signed-off-by: Wesley Kuo Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4663c3dad3f5..c4802f3bd4c5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2854,9 +2854,11 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, * state. If we were running both LE and BR/EDR inquiry * simultaneously, and BR/EDR inquiry is already * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. + * will stop discovery when finished. If we will resolve + * remote device name, do not change discovery state. */ - if (!test_bit(HCI_INQUIRY, &hdev->flags)) + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } else { From 965278dcb8ab0b1f666cc47937933c4be4aea48d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 15:07:54 +0100 Subject: [PATCH 111/481] ARM: 8356/1: mm: handle non-pmd-aligned end of RAM At boot time we round the memblock limit down to section size in an attempt to ensure that we will have mapped this RAM with section mappings prior to allocating from it. When mapping RAM we iterate over PMD-sized chunks, creating these section mappings. Section mappings are only created when the end of a chunk is aligned to section size. Unfortunately, with classic page tables (where PMD_SIZE is 2 * SECTION_SIZE) this means that if a chunk is between 1M and 2M in size the first 1M will not be mapped despite having been accounted for in the memblock limit. This has been observed to result in page tables being allocated from unmapped memory, causing boot-time hangs. This patch modifies the memblock limit rounding to always round down to PMD_SIZE instead of SECTION_SIZE. For classic MMU this means that we will round the memblock limit down to a 2M boundary, matching the limits on section mappings, and preventing allocations from unmapped memory. For LPAE there should be no change as PMD_SIZE == SECTION_SIZE. Signed-off-by: Mark Rutland Reported-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Laura Abbott Tested-by: Hans de Goede Cc: Catalin Marinas Cc: Steve Capper Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e6ef896c619..7186382672b5 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void) } /* - * Find the first non-section-aligned page, and point + * Find the first non-pmd-aligned page, and point * memblock_limit at it. This relies on rounding the - * limit down to be section-aligned, which happens at - * the end of this function. + * limit down to be pmd-aligned, which happens at the + * end of this function. * * With this algorithm, the start or end of almost any - * bank can be non-section-aligned. The only exception - * is that the start of the bank 0 must be section- + * bank can be non-pmd-aligned. The only exception is + * that the start of the bank 0 must be section- * aligned, since otherwise memory would need to be * allocated when mapping the start of bank 0, which * occurs before any free memory is mapped. */ if (!memblock_limit) { - if (!IS_ALIGNED(block_start, SECTION_SIZE)) + if (!IS_ALIGNED(block_start, PMD_SIZE)) memblock_limit = block_start; - else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + else if (!IS_ALIGNED(block_end, PMD_SIZE)) memblock_limit = arm_lowmem_limit; } @@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void) high_memory = __va(arm_lowmem_limit - 1) + 1; /* - * Round the memblock limit down to a section size. This + * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the - * last full section, which should be mapped. + * last full pmd, which should be mapped. */ if (memblock_limit) - memblock_limit = round_down(memblock_limit, SECTION_SIZE); + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!memblock_limit) memblock_limit = arm_lowmem_limit; From fc99f97af2f79be02c5045c9a02c50bdcc0c8ff8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Apr 2015 16:39:51 +0200 Subject: [PATCH 112/481] drm/msm: Fix a couple of 64-bit build warnings Avoid casts from pointers to fixed-size integers to prevent the compiler from warning. Print virtual memory addresses using %p instead. Also turn a couple of %d/%x specifiers into %zu/%zd/%zx to avoid further warnings due to mismatched format strings. Signed-off-by: Thierry Reding Reviewed-by: Rob Clark --- drivers/gpu/drm/msm/edp/edp_aux.c | 4 ++-- drivers/gpu/drm/msm/msm_drv.c | 10 +++++----- drivers/gpu/drm/msm/msm_gem.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/edp/edp_aux.c b/drivers/gpu/drm/msm/edp/edp_aux.c index 5f5a84f6074c..208f9d47f82e 100644 --- a/drivers/gpu/drm/msm/edp/edp_aux.c +++ b/drivers/gpu/drm/msm/edp/edp_aux.c @@ -132,7 +132,7 @@ ssize_t edp_aux_transfer(struct drm_dp_aux *drm_aux, struct drm_dp_aux_msg *msg) /* msg sanity check */ if ((native && (msg->size > AUX_CMD_NATIVE_MAX)) || (msg->size > AUX_CMD_I2C_MAX)) { - pr_err("%s: invalid msg: size(%d), request(%x)\n", + pr_err("%s: invalid msg: size(%zu), request(%x)\n", __func__, msg->size, msg->request); return -EINVAL; } @@ -155,7 +155,7 @@ ssize_t edp_aux_transfer(struct drm_dp_aux *drm_aux, struct drm_dp_aux_msg *msg) */ edp_write(aux->base + REG_EDP_AUX_TRANS_CTRL, 0); msm_edp_aux_ctrl(aux, 1); - pr_err("%s: aux timeout, %d\n", __func__, ret); + pr_err("%s: aux timeout, %zd\n", __func__, ret); goto unlock_exit; } DBG("completion"); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 47f4dd407671..cc5dc5299b8d 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -94,7 +94,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, } if (reglog) - printk(KERN_DEBUG "IO:region %s %08x %08lx\n", dbgname, (u32)ptr, size); + printk(KERN_DEBUG "IO:region %s %p %08lx\n", dbgname, ptr, size); return ptr; } @@ -102,7 +102,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, void msm_writel(u32 data, void __iomem *addr) { if (reglog) - printk(KERN_DEBUG "IO:W %08x %08x\n", (u32)addr, data); + printk(KERN_DEBUG "IO:W %p %08x\n", addr, data); writel(data, addr); } @@ -110,7 +110,7 @@ u32 msm_readl(const void __iomem *addr) { u32 val = readl(addr); if (reglog) - printk(KERN_ERR "IO:R %08x %08x\n", (u32)addr, val); + printk(KERN_ERR "IO:R %p %08x\n", addr, val); return val; } @@ -177,7 +177,7 @@ static int get_mdp_ver(struct platform_device *pdev) const struct of_device_id *match; match = of_match_node(match_types, dev->of_node); if (match) - return (int)match->data; + return (int)(unsigned long)match->data; #endif return 4; } @@ -216,7 +216,7 @@ static int msm_init_vram(struct drm_device *dev) if (ret) return ret; size = r.end - r.start; - DRM_INFO("using VRAM carveout: %lx@%08x\n", size, r.start); + DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); } else #endif diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 479d8af72bcb..52839769eb6c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -483,7 +483,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) uint64_t off = drm_vma_node_start(&obj->vma_node); WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %d\n", + seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %zu\n", msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', msm_obj->read_fence, msm_obj->write_fence, obj->name, obj->refcount.refcount.counter, diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 7acdaa5688b7..7ac2f1997e4a 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -60,7 +60,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova, u32 pa = sg_phys(sg) - sg->offset; size_t bytes = sg->length + sg->offset; - VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes); + VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes); ret = iommu_map(domain, da, pa, bytes, prot); if (ret) @@ -99,7 +99,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova, if (unmapped < bytes) return unmapped; - VERB("unmap[%d]: %08x(%x)", i, iova, bytes); + VERB("unmap[%d]: %08x(%zx)", i, iova, bytes); BUG_ON(!PAGE_ALIGNED(bytes)); From 981371f326235e07e33f62b4196d1e04603f6a49 Mon Sep 17 00:00:00 2001 From: Stephane Viau Date: Thu, 30 Apr 2015 10:39:26 -0400 Subject: [PATCH 113/481] drm/msm/dsi: Fix a couple more 64-bit build warnings Avoid such errors at compilation time: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' Signed-off-by: Stephane Viau --- drivers/gpu/drm/msm/dsi/dsi_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 956b22492c9a..28e712792159 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1023,7 +1023,7 @@ static int dsi_short_read1_resp(u8 *buf, const struct mipi_dsi_msg *msg) *data = buf[1]; /* strip out dcs type */ return 1; } else { - pr_err("%s: read data does not match with rx_buf len %d\n", + pr_err("%s: read data does not match with rx_buf len %zu\n", __func__, msg->rx_len); return -EINVAL; } @@ -1040,7 +1040,7 @@ static int dsi_short_read2_resp(u8 *buf, const struct mipi_dsi_msg *msg) data[1] = buf[2]; return 2; } else { - pr_err("%s: read data does not match with rx_buf len %d\n", + pr_err("%s: read data does not match with rx_buf len %zu\n", __func__, msg->rx_len); return -EINVAL; } From a2ca77898e9b81f6d6b3bfa1ff46713e697e8af7 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Mon, 23 Feb 2015 15:59:21 +0530 Subject: [PATCH 114/481] drm: msm: Fix build when legacy fbdev support isn't set The DRM_KMS_FB_HELPER config is selected only when DRM_MSM_FBDEV config is selected. The driver accesses drm_fb_helper_* functions even when legacy fbdev support is disabled in msm. Wrap around these functions with #ifdef checks to prevent build break. Signed-off-by: Archit Taneja --- drivers/gpu/drm/msm/msm_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index cc5dc5299b8d..72f2b962efd4 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -21,9 +21,11 @@ static void msm_fb_output_poll_changed(struct drm_device *dev) { +#ifdef CONFIG_DRM_MSM_FBDEV struct msm_drm_private *priv = dev->dev_private; if (priv->fbdev) drm_fb_helper_hotplug_event(priv->fbdev); +#endif } static const struct drm_mode_config_funcs mode_config_funcs = { @@ -419,9 +421,11 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file) static void msm_lastclose(struct drm_device *dev) { +#ifdef CONFIG_DRM_MSM_FBDEV struct msm_drm_private *priv = dev->dev_private; if (priv->fbdev) drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev); +#endif } static irqreturn_t msm_irq(int irq, void *arg) From 6128f1bec447cdaa42b61093e351697dbe1c72ed Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 5 Apr 2015 14:06:31 +0200 Subject: [PATCH 115/481] drm/msm/dp: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall --- drivers/gpu/drm/msm/edp/edp_ctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index 0ec5abdba5c4..29e52d7c61c0 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -1149,12 +1149,13 @@ int msm_edp_ctrl_init(struct msm_edp *edp) ctrl->aux = msm_edp_aux_init(dev, ctrl->base, &ctrl->drm_aux); if (!ctrl->aux || !ctrl->drm_aux) { pr_err("%s:failed to init aux\n", __func__); - return ret; + return -ENOMEM; } ctrl->phy = msm_edp_phy_init(dev, ctrl->base); if (!ctrl->phy) { pr_err("%s:failed to init phy\n", __func__); + ret = -ENOMEM; goto err_destory_aux; } From 651ad3f52b9d0b11b8ebe94c3810ac89f9f75653 Mon Sep 17 00:00:00 2001 From: Hai Li Date: Wed, 29 Apr 2015 11:38:59 -0400 Subject: [PATCH 116/481] drm/msm/dsi: Fixup missing *break* statement during cmd rx Signed-off-by: Hai Li --- drivers/gpu/drm/msm/dsi/dsi_host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 28e712792159..a3ff0e296604 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1797,6 +1797,7 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host, case MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT: pr_err("%s: rx ACK_ERR_PACLAGE\n", __func__); ret = 0; + break; case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_1BYTE: case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_1BYTE: ret = dsi_short_read1_resp(buf, msg); From fe34464df5e8bd4b09db170477f32db4eade0444 Mon Sep 17 00:00:00 2001 From: Stephane Viau Date: Thu, 30 Apr 2015 13:45:52 -0400 Subject: [PATCH 117/481] drm/msm/mdp5: Fix iteration on INTF config array The current iteration in get_dsi_id_from_intf() is wrong: instead of iterating until hw_cfg->intf.count, we need to iterate until MDP5_INTF_NUM_MAX here. Let's take the example of msm8x16: hw_cfg->intf.count = 1 intfs[0] = INTF_Disabled intfs[1] = INTF_DSI If we stop iterating once i reaches hw_cfg->intf.count (== 1), we will miss the test for intfs[1]. Actually, this hw_cfg->intf.count entry is quite confusing and is not (or *should not be*) used anywhere else; let's remove it. Signed-off-by: Stephane Viau --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c | 34 ++++++++++++------------- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h | 9 ++++--- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 12 ++++----- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index e001e6b2296a..8b9a7931b162 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -72,14 +72,13 @@ const struct mdp5_cfg_hw msm8x74_config = { .base = { 0x12d00, 0x12e00, 0x12f00 }, }, .intf = { - .count = 4, .base = { 0x12500, 0x12700, 0x12900, 0x12b00 }, - }, - .intfs = { - [0] = INTF_eDP, - [1] = INTF_DSI, - [2] = INTF_DSI, - [3] = INTF_HDMI, + .connect = { + [0] = INTF_eDP, + [1] = INTF_DSI, + [2] = INTF_DSI, + [3] = INTF_HDMI, + }, }, .max_clk = 200000000, }; @@ -142,14 +141,13 @@ const struct mdp5_cfg_hw apq8084_config = { .base = { 0x12f00, 0x13000, 0x13100, 0x13200 }, }, .intf = { - .count = 5, .base = { 0x12500, 0x12700, 0x12900, 0x12b00, 0x12d00 }, - }, - .intfs = { - [0] = INTF_eDP, - [1] = INTF_DSI, - [2] = INTF_DSI, - [3] = INTF_HDMI, + .connect = { + [0] = INTF_eDP, + [1] = INTF_DSI, + [2] = INTF_DSI, + [3] = INTF_HDMI, + }, }, .max_clk = 320000000, }; @@ -196,10 +194,12 @@ const struct mdp5_cfg_hw msm8x16_config = { }, .intf = { - .count = 1, /* INTF_1 */ - .base = { 0x6B800 }, + .base = { 0x00000, 0x6b800 }, + .connect = { + [0] = INTF_DISABLED, + [1] = INTF_DSI, + }, }, - /* TODO enable .intfs[] with [1] = INTF_DSI, once DSI is implemented */ .max_clk = 320000000, }; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h index 3a551b0892d8..69349abe59f2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h @@ -59,6 +59,11 @@ struct mdp5_smp_block { #define MDP5_INTF_NUM_MAX 5 +struct mdp5_intf_block { + uint32_t base[MAX_BASES]; + u32 connect[MDP5_INTF_NUM_MAX]; /* array of enum mdp5_intf_type */ +}; + struct mdp5_cfg_hw { char *name; @@ -72,9 +77,7 @@ struct mdp5_cfg_hw { struct mdp5_sub_block dspp; struct mdp5_sub_block ad; struct mdp5_sub_block pp; - struct mdp5_sub_block intf; - - u32 intfs[MDP5_INTF_NUM_MAX]; /* array of enum mdp5_intf_type */ + struct mdp5_intf_block intf; uint32_t max_clk; }; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index dfa8beb9343a..bbacf9d2b738 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -206,8 +206,8 @@ static struct drm_encoder *construct_encoder(struct mdp5_kms *mdp5_kms, static int get_dsi_id_from_intf(const struct mdp5_cfg_hw *hw_cfg, int intf_num) { - const int intf_cnt = hw_cfg->intf.count; - const u32 *intfs = hw_cfg->intfs; + const enum mdp5_intf_type *intfs = hw_cfg->intf.connect; + const int intf_cnt = ARRAY_SIZE(hw_cfg->intf.connect); int id = 0, i; for (i = 0; i < intf_cnt; i++) { @@ -228,7 +228,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num) struct msm_drm_private *priv = dev->dev_private; const struct mdp5_cfg_hw *hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - enum mdp5_intf_type intf_type = hw_cfg->intfs[intf_num]; + enum mdp5_intf_type intf_type = hw_cfg->intf.connect[intf_num]; struct drm_encoder *encoder; int ret = 0; @@ -365,7 +365,7 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) /* Construct encoders and modeset initialize connector devices * for each external display interface. */ - for (i = 0; i < ARRAY_SIZE(hw_cfg->intfs); i++) { + for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) { ret = modeset_init_intf(mdp5_kms, i); if (ret) goto fail; @@ -514,8 +514,8 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) */ mdp5_enable(mdp5_kms); for (i = 0; i < MDP5_INTF_NUM_MAX; i++) { - if (!config->hw->intf.base[i] || - mdp5_cfg_intf_is_virtual(config->hw->intfs[i])) + if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) || + !config->hw->intf.base[i]) continue; mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0); } From 7194b62c8c99727e3f145b00b8390a905252370e Mon Sep 17 00:00:00 2001 From: Stephane Viau Date: Tue, 5 May 2015 09:47:57 -0400 Subject: [PATCH 118/481] drm/msm: fix unbalanced DRM framebuffer init/destroy When msm_framebuffer_init() fails before calling drm_framebuffer_init(), drm_framebuffer_cleanup() [called in msm_framebuffer_destroy()] is still being called even though drm_framebuffer_init() was not called for that buffer. Thus a NULL pointer derefencing: [ 247.529691] Unable to handle kernel NULL pointer dereference at virtual address 0000027c ... [ 247.563996] PC is at __mutex_lock_slowpath+0x94/0x3a8 ... [ 247.823025] [] (__mutex_lock_slowpath) from [] (mutex_lock+0x20/0x3c) [ 247.831186] [] (mutex_lock) from [] (drm_framebuffer_cleanup+0x18/0x38) [ 247.839520] [] (drm_framebuffer_cleanup) from [] (msm_framebuffer_destroy+0x48/0x100) [ 247.849066] [] (msm_framebuffer_destroy) from [] (msm_framebuffer_init+0x1e8/0x228) [ 247.858439] [] (msm_framebuffer_init) from [] (msm_framebuffer_create+0x70/0x134) [ 247.867642] [] (msm_framebuffer_create) from [] (internal_framebuffer_create+0x67c/0x7b4) [ 247.877537] [] (internal_framebuffer_create) from [] (drm_mode_addfb2+0x20/0x98) [ 247.886650] [] (drm_mode_addfb2) from [] (drm_ioctl+0x240/0x420) [ 247.894378] [] (drm_ioctl) from [] (do_vfs_ioctl+0x4e4/0x5a4) ... Signed-off-by: Stephane Viau [plus initialize msm_fb to NULL to -Rob] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_fb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index 6b573e612f27..121713281417 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -172,8 +172,8 @@ struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, { struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; - struct msm_framebuffer *msm_fb; - struct drm_framebuffer *fb = NULL; + struct msm_framebuffer *msm_fb = NULL; + struct drm_framebuffer *fb; const struct msm_format *format; int ret, i, n; unsigned int hsub, vsub; @@ -239,8 +239,7 @@ struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, return fb; fail: - if (fb) - msm_framebuffer_destroy(fb); + kfree(msm_fb); return ERR_PTR(ret); } From ff431fa4d96fc34568454aae4cc264a7760636a8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 7 May 2015 15:19:02 -0400 Subject: [PATCH 119/481] drm/msm/dsi: use pr_err_ratelimited When things go badly we can get a lot of these error irqs. Let's not DoS the user. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index a3ff0e296604..47a7f0036478 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1215,7 +1215,7 @@ static void dsi_err_worker(struct work_struct *work) container_of(work, struct msm_dsi_host, err_work); u32 status = msm_host->err_work_state; - pr_err("%s: status=%x\n", __func__, status); + pr_err_ratelimited("%s: status=%x\n", __func__, status); if (status & DSI_ERR_STATE_MDP_FIFO_UNDERFLOW) dsi_sw_reset_restore(msm_host); From 13f15565f7887a028b3442bbd763ff6d07b48479 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 7 May 2015 15:20:13 -0400 Subject: [PATCH 120/481] drm/msm: setup vram after component_bind_all() First of all, we don't want -EPROBE_DEFER when trying to bind children to cause us to forget to free our vram. And second we don't want vram allocation fail to trigger _unbind_all() before _bind_all(). Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 72f2b962efd4..cc6485ef2949 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -285,10 +285,6 @@ static int msm_load(struct drm_device *dev, unsigned long flags) drm_mode_config_init(dev); - ret = msm_init_vram(dev); - if (ret) - goto fail; - platform_set_drvdata(pdev, dev); /* Bind all our sub-components: */ @@ -296,6 +292,10 @@ static int msm_load(struct drm_device *dev, unsigned long flags) if (ret) return ret; + ret = msm_init_vram(dev); + if (ret) + goto fail; + switch (get_mdp_ver(pdev)) { case 4: kms = mdp4_kms_init(dev); From db7c727402b3f6a604f0c52be5f6df8ca3797030 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 4 May 2015 11:43:31 -0700 Subject: [PATCH 121/481] mtd: readtest: don't clobber error reports Commit 2a6a28e7922c ("mtd: Make MTD tests cancelable") accidentally clobbered any read failure reports. Coverity CID #1296020 Signed-off-by: Brian Norris --- drivers/mtd/tests/readtest.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/tests/readtest.c b/drivers/mtd/tests/readtest.c index a3196b750a22..58df07acdbdb 100644 --- a/drivers/mtd/tests/readtest.c +++ b/drivers/mtd/tests/readtest.c @@ -191,9 +191,11 @@ static int __init mtd_readtest_init(void) err = ret; } - err = mtdtest_relax(); - if (err) + ret = mtdtest_relax(); + if (ret) { + err = ret; goto out; + } } if (err) From 6f6b287968681f660e151c202765da9f58d3dcba Mon Sep 17 00:00:00 2001 From: Hai Li Date: Thu, 23 Apr 2015 14:13:21 -0400 Subject: [PATCH 122/481] drm/msm: Attach assigned encoder to eDP and DSI connectors drm_mode_connector_attach_encoder() function call is missing during eDP and DSI connector initialization. As a result, no encoder is returned by DRM_IOCTL_MODE_GETCONNECTOR system call. This change is to fix this issue. Signed-off-by: Hai Li --- drivers/gpu/drm/msm/dsi/dsi.c | 10 +++++----- drivers/gpu/drm/msm/dsi/dsi_manager.c | 6 +++++- drivers/gpu/drm/msm/edp/edp_connector.c | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 28d1f95a90cc..ad50b80225f5 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -177,6 +177,11 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } + for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) { + encoders[i]->bridge = msm_dsi->bridge; + msm_dsi->encoders[i] = encoders[i]; + } + msm_dsi->connector = msm_dsi_manager_connector_init(msm_dsi->id); if (IS_ERR(msm_dsi->connector)) { ret = PTR_ERR(msm_dsi->connector); @@ -185,11 +190,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) { - encoders[i]->bridge = msm_dsi->bridge; - msm_dsi->encoders[i] = encoders[i]; - } - priv->bridges[priv->num_bridges++] = msm_dsi->bridge; priv->connectors[priv->num_connectors++] = msm_dsi->connector; diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index ee3ebcaa33f5..0a40f3c64e8b 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -462,7 +462,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); struct drm_connector *connector = NULL; struct dsi_connector *dsi_connector; - int ret; + int ret, i; dsi_connector = devm_kzalloc(msm_dsi->dev->dev, sizeof(*dsi_connector), GFP_KERNEL); @@ -495,6 +495,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) if (ret) goto fail; + for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) + drm_mode_connector_attach_encoder(connector, + msm_dsi->encoders[i]); + return connector; fail: diff --git a/drivers/gpu/drm/msm/edp/edp_connector.c b/drivers/gpu/drm/msm/edp/edp_connector.c index d8812e84da54..b4d1b469862a 100644 --- a/drivers/gpu/drm/msm/edp/edp_connector.c +++ b/drivers/gpu/drm/msm/edp/edp_connector.c @@ -151,6 +151,8 @@ struct drm_connector *msm_edp_connector_init(struct msm_edp *edp) if (ret) goto fail; + drm_mode_connector_attach_encoder(connector, edp->encoder); + return connector; fail: From ec1936eb099bb8c1c7a32b9ac4be128e1e68e2d9 Mon Sep 17 00:00:00 2001 From: Hai Li Date: Wed, 29 Apr 2015 11:39:00 -0400 Subject: [PATCH 123/481] drm/msm/dsi: Simplify the code to get the number of read byte During cmd rx, only new versions of H/W provide register to read back the real number of byte returned by panel. For the old versions, reading this register will not get the right number. In fact, we only need to assume the returned data is the same size as we expected, because later we will check the data type to detect error. Signed-off-by: Hai Li --- drivers/gpu/drm/msm/dsi/dsi_host.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 47a7f0036478..649d20d29f92 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1093,7 +1093,6 @@ static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host, { u32 *lp, *temp, data; int i, j = 0, cnt; - bool ack_error = false; u32 read_cnt; u8 reg[16]; int repeated_bytes = 0; @@ -1105,15 +1104,10 @@ static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host, if (cnt > 4) cnt = 4; /* 4 x 32 bits registers only */ - /* Calculate real read data count */ - read_cnt = dsi_read(msm_host, 0x1d4) >> 16; - - ack_error = (rx_byte == 4) ? - (read_cnt == 8) : /* short pkt + 4-byte error pkt */ - (read_cnt == (pkt_size + 6 + 4)); /* long pkt+4-byte error pkt*/ - - if (ack_error) - read_cnt -= 4; /* Remove 4 byte error pkt */ + if (rx_byte == 4) + read_cnt = 4; + else + read_cnt = pkt_size + 6; /* * In case of multiple reads from the panel, after the first read, there From 91dd93f956b9ea9ecf47fd4b9acd2d2e7f980303 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 May 2015 17:24:50 -0700 Subject: [PATCH 124/481] netlink: move nl_table in read_mostly section netlink sockets creation and deletion heavily modify nl_table_users and nl_table_lock. If nl_table is sharing one cache line with one of them, netlink performance is really bad on SMP. ffffffff81ff5f00 B nl_table ffffffff81ff5f0c b nl_table_users Putting nl_table in read_mostly section increased performance of my open/delete netlink sockets test by about 80 % This came up while diagnosing a getaddrinfo() problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index daa0b818174b..dbe885901b34 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -89,7 +89,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct netlink_table *nl_table; +struct netlink_table *nl_table __read_mostly; EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); From e87a468eb97da35d8dc00e8fa9828b4de4ab69d0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 14 May 2015 20:34:08 -0400 Subject: [PATCH 125/481] ipv6: Fix udp checksums with raw sockets It was reported that trancerout6 would cause a kernel to crash when trying to compute checksums on raw UDP packets. The cause was the check in __ip6_append_data that would attempt to use partial checksums on the packet. However, raw sockets do not initialize partial checksum fields so partial checksums can't be used. Solve this the same way IPv4 does it. raw sockets pass transhdrlen value of 0 to ip_append_data which causes the checksum to be computed in software. Use the same check in ip6_append_data (check transhdrlen). Reported-by: Wolfgang Walter CC: Wolfgang Walter CC: Eric Dumazet Signed-off-by: Vladislav Yasevich Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c21777565c58..bc09cb97b840 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1300,8 +1300,10 @@ emsgsize: /* If this is the first and only packet and device * supports checksum offloading, let's use it. + * Use transhdrlen, same as IPv4, because partial + * sums only work when transhdrlen is set. */ - if (!skb && sk->sk_protocol == IPPROTO_UDP && + if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && length + fragheaderlen < mtu && rt->dst.dev->features & NETIF_F_V6_CSUM && !exthdrlen) From c1c52db16e26a26b545821abae303310a074350f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 14 May 2015 18:17:08 -0500 Subject: [PATCH 126/481] net/mlx4: Avoid 'may be used uninitialized' warnings With a cross-compiler based on gcc-4.9, I see warnings like the following: drivers/net/ethernet/mellanox/mlx4/resource_tracker.c: In function 'mlx4_SW2HW_CQ_wrapper': drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:3048:10: error: 'cq' may be used uninitialized in this function [-Werror=maybe-uninitialized] cq->mtt = mtt; I think the warning is spurious because we only use cq when cq_res_start_move_to() returns zero, and it always initializes *cq in that case. The srq case is similar. But maybe gcc isn't smart enough to figure that out. Initialize cq and srq explicitly to avoid the warnings. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 92fce1b98558..bafe2180cf0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3187,7 +3187,7 @@ int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, int cqn = vhcr->in_modifier; struct mlx4_cq_context *cqc = inbox->buf; int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; - struct res_cq *cq; + struct res_cq *cq = NULL; struct res_mtt *mtt; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq); @@ -3223,7 +3223,7 @@ int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, { int err; int cqn = vhcr->in_modifier; - struct res_cq *cq; + struct res_cq *cq = NULL; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq); if (err) @@ -3362,7 +3362,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, int err; int srqn = vhcr->in_modifier; struct res_mtt *mtt; - struct res_srq *srq; + struct res_srq *srq = NULL; struct mlx4_srq_context *srqc = inbox->buf; int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz; @@ -3406,7 +3406,7 @@ int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, { int err; int srqn = vhcr->in_modifier; - struct res_srq *srq; + struct res_srq *srq = NULL; err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq); if (err) From eea39946a1f36e8a5a47c86e7ecfca6076868505 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 13 May 2015 21:17:41 -0700 Subject: [PATCH 127/481] rename RTNH_F_EXTERNAL to RTNH_F_OFFLOAD RTNH_F_EXTERNAL today is printed as "offload" in iproute2 output. This patch renames the flag to be consistent with what the user sees. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 +- net/ipv4/fib_trie.c | 2 +- net/switchdev/switchdev.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 974db03f7b1a..17fb02f488da 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -337,7 +337,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_EXTERNAL 8 /* Route installed externally */ +#define RTNH_F_OFFLOAD 8 /* offloaded route */ /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..64c2076ced54 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1764,7 +1764,7 @@ void fib_table_flush_external(struct fib_table *tb) /* record local slen */ slen = fa->fa_slen; - if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) + if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) continue; netdev_switch_fib_ipv4_del(n->key, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 46568b85c333..055453d48668 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -338,7 +338,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, fi, tos, type, nlflags, tb_id); if (!err) - fi->fib_flags |= RTNH_F_EXTERNAL; + fi->fib_flags |= RTNH_F_OFFLOAD; } return err; @@ -364,7 +364,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, const struct swdev_ops *ops; int err = 0; - if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; dev = netdev_switch_get_dev_by_nhs(fi); @@ -376,7 +376,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, fi, tos, type, tb_id); if (!err) - fi->fib_flags &= ~RTNH_F_EXTERNAL; + fi->fib_flags &= ~RTNH_F_OFFLOAD; } return err; From 63509c60bbc62120fb0e3b287c86ac036b893d90 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 May 2015 09:17:54 +0200 Subject: [PATCH 128/481] target: Fix bidi command handling The function transport_complete_qf() must call either queue_data_in() or queue_status() but not both. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3fe5cb240b6f..99a38cf320ff 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1957,8 +1957,7 @@ static void transport_complete_qf(struct se_cmd *cmd) case DMA_TO_DEVICE: if (cmd->se_cmd_flags & SCF_BIDI) { ret = cmd->se_tfo->queue_data_in(cmd); - if (ret < 0) - break; + break; } /* Fall through for DMA_TO_DEVICE */ case DMA_NONE: From 6c2faeaa0ecc67098106771cba8b7ed1e99a1b5f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 May 2015 09:19:02 +0200 Subject: [PATCH 129/481] target: Add missing parentheses Code like " &= ~CMD_T_BUSY | ..." only clears CMD_T_BUSY but not the other flag. Modify these statements such that both flags are cleared. (Fix fuzz for target_write_prot_action code in mainline - nab) Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 99a38cf320ff..90c92f04a586 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1770,7 +1770,7 @@ static int target_write_prot_action(struct se_cmd *cmd) sectors, 0, NULL, 0); if (unlikely(cmd->pi_err)) { spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT; + cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); spin_unlock_irq(&cmd->t_state_lock); transport_generic_request_failure(cmd, cmd->pi_err); return -1; @@ -1868,7 +1868,7 @@ void target_execute_cmd(struct se_cmd *cmd) if (target_handle_task_attr(cmd)) { spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT; + cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT); spin_unlock_irq(&cmd->t_state_lock); return; } From ed65918735a50e1002d856749d3f1f5072f92cfa Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 30 Apr 2015 09:29:06 +0300 Subject: [PATCH 130/481] iwlwifi: 7000: modify the firmware name for 3165 3165 really needs to load iwlwifi-7265D-13.ucode. This device is supported starting from -13.ucode, update the MIN and OK defines accordingly. While at it, add 3165 to the device list in the Kconfig file. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/Kconfig | 1 + drivers/net/wireless/iwlwifi/iwl-7000.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig index ab019b45551b..f89f446e5c8a 100644 --- a/drivers/net/wireless/iwlwifi/Kconfig +++ b/drivers/net/wireless/iwlwifi/Kconfig @@ -21,6 +21,7 @@ config IWLWIFI Intel 7260 Wi-Fi Adapter Intel 3160 Wi-Fi Adapter Intel 7265 Wi-Fi Adapter + Intel 3165 Wi-Fi Adapter This driver uses the kernel's mac80211 subsystem. diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 36e786f0387b..74ad278116be 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -70,15 +70,14 @@ /* Highest firmware API version supported */ #define IWL7260_UCODE_API_MAX 13 -#define IWL3160_UCODE_API_MAX 13 /* Oldest version we won't warn about */ #define IWL7260_UCODE_API_OK 12 -#define IWL3160_UCODE_API_OK 12 +#define IWL3165_UCODE_API_OK 13 /* Lowest firmware API version supported */ #define IWL7260_UCODE_API_MIN 10 -#define IWL3160_UCODE_API_MIN 10 +#define IWL3165_UCODE_API_MIN 13 /* NVM versions */ #define IWL7260_NVM_VERSION 0x0a1d @@ -104,9 +103,6 @@ #define IWL3160_FW_PRE "iwlwifi-3160-" #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode" -#define IWL3165_FW_PRE "iwlwifi-3165-" -#define IWL3165_MODULE_FIRMWARE(api) IWL3165_FW_PRE __stringify(api) ".ucode" - #define IWL7265_FW_PRE "iwlwifi-7265-" #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode" @@ -248,8 +244,13 @@ static const struct iwl_ht_params iwl7265_ht_params = { const struct iwl_cfg iwl3165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 3165", - .fw_name_pre = IWL3165_FW_PRE, + .fw_name_pre = IWL7265D_FW_PRE, IWL_DEVICE_7000, + /* sparse doens't like the re-assignment but it is safe */ +#ifndef __CHECKER__ + .ucode_api_ok = IWL3165_UCODE_API_OK, + .ucode_api_min = IWL3165_UCODE_API_MIN, +#endif .ht_params = &iwl7000_ht_params, .nvm_ver = IWL3165_NVM_VERSION, .nvm_calib_ver = IWL3165_TX_POWER_VERSION, @@ -325,6 +326,5 @@ const struct iwl_cfg iwl7265d_n_cfg = { MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); -MODULE_FIRMWARE(IWL3165_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); From 1aa02b5a33506387828f77eb607342a7dfa6beff Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 29 Apr 2015 05:11:10 +0300 Subject: [PATCH 131/481] iwlwifi: pcie: don't disable the busmaster DMA clock for family 8000 Disabling the clocks is a standard procedure while stopping the device. On family 8000 however, disabling the bus master DMA clock increases the NIC's power consumption. To fix this, skip this call if the device family is IWL_DEVICE_FAMILY_8000. Signed-off-by: Avri Altman Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 47bbf573fdc8..d6f6515fe663 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1049,9 +1049,11 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) iwl_pcie_rx_stop(trans); /* Power-down device's busmaster DMA clocks */ - iwl_write_prph(trans, APMG_CLK_DIS_REG, - APMG_CLK_VAL_DMA_CLK_RQT); - udelay(5); + if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) { + iwl_write_prph(trans, APMG_CLK_DIS_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(5); + } } /* Make sure (redundant) we've released our request to stay awake */ From ba830b3d093580f424fd31b7cd693046a86cf030 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 14 May 2015 12:11:38 +0300 Subject: [PATCH 132/481] iwlwifi: mvm: fix MLME trigger A few triggers have status = MLME_SUCCESS and they are still interesting. E.g. if we want to collect data upon deauth, the status will be MLME_SUCCESS. Fix that. Fixes: d42f53503406 ("iwlwifi: mvm: add trigger for firmware dump upon MLME failures") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 40265b9c66ae..dda9f7b5f342 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -3995,9 +3995,6 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw, if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_MLME)) return; - if (event->u.mlme.status == MLME_SUCCESS) - return; - trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME); trig_mlme = (void *)trig->data; if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig)) From 1b97937246d8b97c0760d16d8992c7937bdf5e6a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 15 May 2015 11:02:23 +0100 Subject: [PATCH 133/481] ARM: fix missing syscall trace exit Josh Stone reports: I've discovered a case where both arm and arm64 will miss a ptrace syscall-exit that they should report. If the syscall is entered without TIF_SYSCALL_TRACE set, then it goes on the fast path. It's then possible to have TIF_SYSCALL_TRACE added in the middle of the syscall, but ret_fast_syscall doesn't check this flag again. Fix this by always checking for a syscall trace in the fast exit path. Reported-by: Josh Stone Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f8ccc21fa032..4e7f40c577e6 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -33,7 +33,9 @@ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq @ disable interrupts - ldr r1, [tsk, #TI_FLAGS] + ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing + tst r1, #_TIF_SYSCALL_WORK + bne __sys_trace_return tst r1, #_TIF_WORK_MASK bne fast_work_pending asm_trace_hardirqs_on From 774449ebcb18bae146e2b6f6d012b46e64a095b9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 15 May 2015 09:19:36 -0400 Subject: [PATCH 134/481] drm/msm: fix locking inconsistencies in gpu->destroy() In error paths, this was being called without struct_mutex held. Leading to panics like: msm 1a00000.qcom,mdss_mdp: No memory protection without IOMMU Kernel panic - not syncing: BUG! CPU: 0 PID: 1409 Comm: cat Not tainted 4.0.0-dirty #4 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) Call trace: [] dump_backtrace+0x0/0x118 [] show_stack+0x10/0x20 [] dump_stack+0x84/0xc4 [] panic+0xd0/0x210 [] drm_gem_object_free+0x5c/0x60 [] adreno_gpu_cleanup+0x60/0x80 [] a3xx_destroy+0x20/0x70 [] a3xx_gpu_init+0x84/0x108 [] adreno_load_gpu+0x58/0x190 [] msm_open+0x74/0x88 [] drm_open+0x168/0x400 [] drm_stub_open+0xa8/0x118 [] chrdev_open+0x94/0x198 [] do_dentry_open+0x208/0x310 [] vfs_open+0x44/0x50 [] do_last.isra.14+0x2c4/0xc10 [] path_openat+0x80/0x5e8 [] do_filp_open+0x2c/0x98 [] do_sys_open+0x13c/0x228 [] SyS_openat+0xc/0x18 CPU1: stopping But there isn't any particularly good reason to hold struct_mutex for teardown, so just standardize on calling it without the mutex held and use the _unlocked() versions for GEM obj unref'ing Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 94a5bee69fe7..bbdcab0a56c1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -384,7 +384,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu) if (gpu->memptrs_bo) { if (gpu->memptrs_iova) msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); - drm_gem_object_unreference(gpu->memptrs_bo); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); } release_firmware(gpu->pm4); release_firmware(gpu->pfp); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index cc6485ef2949..c80a6bee2b18 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -145,8 +145,8 @@ static int msm_unload(struct drm_device *dev) if (gpu) { mutex_lock(&dev->struct_mutex); gpu->funcs->pm_suspend(gpu); - gpu->funcs->destroy(gpu); mutex_unlock(&dev->struct_mutex); + gpu->funcs->destroy(gpu); } if (priv->vram.paddr) { diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 8171537dd7d1..1f14b908b221 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -56,6 +56,6 @@ fail: void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { if (ring->bo) - drm_gem_object_unreference(ring->bo); + drm_gem_object_unreference_unlocked(ring->bo); kfree(ring); } From 86b5e7de07aa1abc87ecc40d2aca6070348e4469 Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Wed, 13 May 2015 17:01:36 -0500 Subject: [PATCH 135/481] net: macb: Add better comment for RXUBR handling Describe the handler for RXUBR better with a new comment. Signed-off-by: Nathan Sullivan Reviewied-by: Josh Cartwright Reviewied-by: Ben Shelton Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 61aa570aad9a..5f10dfc631d7 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1037,6 +1037,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) * add that if/when we get our hands on a full-blown MII PHY. */ + /* There is a hardware issue under heavy load where DMA can + * stop, this causes endless "used buffer descriptor read" + * interrupts but it can be cleared by re-enabling RX. See + * the at91 manual, section 41.3.1 or the Zynq manual + * section 16.7.4 for details. + */ if (status & MACB_BIT(RXUBR)) { ctrl = macb_readl(bp, NCR); macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); From 595ca5880b37d4aa3c292d75531577175d36b225 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 May 2015 14:15:58 +0200 Subject: [PATCH 136/481] netfilter: avoid build error if TPROXY/SOCKET=y && NF_DEFRAG_IPV6=m With TPROXY=y but DEFRAG_IPV6=m we get build failure: net/built-in.o: In function `tproxy_tg_init': net/netfilter/xt_TPROXY.c:588: undefined reference to `nf_defrag_ipv6_enable' If DEFRAG_IPV6 is modular, TPROXY must be too. (or both must be builtin). This enforces =m for both. Reported-and-tested-by: Liu Hua Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f70e34a68f70..a0f3e6a3c7d1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -863,6 +863,7 @@ config NETFILTER_XT_TARGET_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED depends on (IPV6 || IPV6=n) + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES @@ -1356,6 +1357,7 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_ADVANCED depends on !NF_CONNTRACK || NF_CONNTRACK depends on (IPV6 || IPV6=n) + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES help From b3cad287d13b5f6695c6b4aab72969cd64bf0171 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 7 May 2015 14:54:16 +0200 Subject: [PATCH 137/481] conntrack: RFC5961 challenge ACK confuse conntrack LAST-ACK transition In compliance with RFC5961, the network stack send challenge ACK in response to spurious SYN packets, since commit 0c228e833c88 ("tcp: Restore RFC5961-compliant behavior for SYN packets"). This pose a problem for netfilter conntrack in state LAST_ACK, because this challenge ACK is (falsely) seen as ACKing last FIN, causing a false state transition (into TIME_WAIT). The challenge ACK is hard to distinguish from real last ACK. Thus, solution introduce a flag that tracks the potential for seeing a challenge ACK, in case a SYN packet is let through and current state is LAST_ACK. When conntrack transition LAST_ACK to TIME_WAIT happens, this flag is used for determining if we are expecting a challenge ACK. Scapy based reproducer script avail here: https://github.com/netoptimizer/network-testing/blob/master/scapy/tcp_hacks_3WHS_LAST_ACK.py Fixes: 0c228e833c88 ("tcp: Restore RFC5961-compliant behavior for SYN packets") Signed-off-by: Jesper Dangaard Brouer Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- .../uapi/linux/netfilter/nf_conntrack_tcp.h | 3 ++ net/netfilter/nf_conntrack_proto_tcp.c | 35 +++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h index 9993a421201c..ef9f80f0f529 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -42,6 +42,9 @@ enum tcp_conntrack { /* The field td_maxack has been set */ #define IP_CT_TCP_FLAG_MAXACK_SET 0x20 +/* Marks possibility for expected RFC5961 challenge ACK */ +#define IP_CT_EXP_CHALLENGE_ACK 0x40 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 5caa0c41bf26..70383de72054 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -202,7 +202,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW - * sLA -> sTW Last ACK detected. + * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ @@ -261,7 +261,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW - * sLA -> sTW Last ACK detected. + * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ @@ -906,6 +906,7 @@ static int tcp_packet(struct nf_conn *ct, 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; + ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, @@ -923,7 +924,9 @@ static int tcp_packet(struct nf_conn *ct, * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and - * then we'll get in sync. Otherwise, the server ignores it. */ + * then we'll get in sync. Otherwise, the server potentially + * responds with a challenge ACK if implementing RFC5961. + */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; @@ -939,6 +942,13 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } + /* Mark the potential for RFC5961 challenge ACK, + * this pose a special problem for LAST_ACK state + * as ACK is intrepretated as ACKing last FIN. + */ + if (old_state == TCP_CONNTRACK_LAST_ACK) + ct->proto.tcp.last_flags |= + IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) @@ -970,6 +980,25 @@ static int tcp_packet(struct nf_conn *ct, nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; + case TCP_CONNTRACK_TIME_WAIT: + /* RFC5961 compliance cause stack to send "challenge-ACK" + * e.g. in response to spurious SYNs. Conntrack MUST + * not believe this ACK is acking last FIN. + */ + if (old_state == TCP_CONNTRACK_LAST_ACK && + index == TCP_ACK_SET && + ct->proto.tcp.last_dir != dir && + ct->proto.tcp.last_index == TCP_SYN_SET && + (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { + /* Detected RFC5961 challenge ACK */ + ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; + spin_unlock_bh(&ct->lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: challenge-ACK ignored "); + return NF_ACCEPT; /* Don't change state */ + } + break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) From 8947e396a8296c5297928b60043f35dfa56baa05 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 14 May 2015 10:32:53 -0700 Subject: [PATCH 138/481] Documentation: dt: mtd: replace "nor-jedec" binding with "jedec, spi-nor" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 8ff16cf77ce3 ("Documentation: devicetree: m25p80: add "nor-jedec" binding"), we added a generic "nor-jedec" binding to catch all mostly-compatible SPI NOR flash which can be detected via the READ ID opcode (0x9F). This was discussed and reviewed at the time, however objections have come up since then as part of this discussion: http://lkml.kernel.org/g/20150511224646.GJ32500@ld-irv-0074 It seems the parties involved agree that "jedec,spi-nor" does a better job of capturing the fact that this is SPI-specific, not just any NOR flash. This binding was only merged for v4.1-rc1, so it's still OK to change the naming. At the same time, let's move the documentation to a better name. Next up: stop referring to code (drivers/mtd/devices/m25p80.c) from the documentation. Signed-off-by: Brian Norris Cc: Marek Vasut Cc: Rafał Miłecki Cc: Rob Herring Cc: Pawel Moll Cc: Ian Campbell Cc: Kumar Gala Cc: devicetree@vger.kernel.org Acked-by: Stephen Warren Acked-by: Geert Uytterhoeven Acked-by: Mark Rutland --- .../bindings/mtd/{m25p80.txt => jedec,spi-nor.txt} | 6 +++--- drivers/mtd/devices/m25p80.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) rename Documentation/devicetree/bindings/mtd/{m25p80.txt => jedec,spi-nor.txt} (85%) diff --git a/Documentation/devicetree/bindings/mtd/m25p80.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt similarity index 85% rename from Documentation/devicetree/bindings/mtd/m25p80.txt rename to Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index f20b111b502a..2bee68103b01 100644 --- a/Documentation/devicetree/bindings/mtd/m25p80.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -8,8 +8,8 @@ Required properties: is not Linux-only, but in case of Linux, see the "m25p_ids" table in drivers/mtd/devices/m25p80.c for the list of supported chips. - Must also include "nor-jedec" for any SPI NOR flash that can be - identified by the JEDEC READ ID opcode (0x9F). + Must also include "jedec,spi-nor" for any SPI NOR flash that can + be identified by the JEDEC READ ID opcode (0x9F). - reg : Chip-Select number - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at @@ -25,7 +25,7 @@ Example: flash: m25p80@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "spansion,m25p80", "nor-jedec"; + compatible = "spansion,m25p80", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; m25p,fast-read; diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 7c8b1694a134..3af137f49ac9 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -223,7 +223,7 @@ static int m25p_probe(struct spi_device *spi) */ if (data && data->type) flash_name = data->type; - else if (!strcmp(spi->modalias, "nor-jedec")) + else if (!strcmp(spi->modalias, "spi-nor")) flash_name = NULL; /* auto-detect */ else flash_name = spi->modalias; @@ -255,7 +255,7 @@ static int m25p_remove(struct spi_device *spi) * since most of these flash are compatible to some extent, and their * differences can often be differentiated by the JEDEC read-ID command, we * encourage new users to add support to the spi-nor library, and simply bind - * against a generic string here (e.g., "nor-jedec"). + * against a generic string here (e.g., "jedec,spi-nor"). * * Many flash names are kept here in this list (as well as in spi-nor.c) to * keep them available as module aliases for existing platforms. @@ -305,7 +305,7 @@ static const struct spi_device_id m25p_ids[] = { * Generic support for SPI NOR that can be identified by the JEDEC READ * ID opcode (0x9F). Use this, if possible. */ - {"nor-jedec"}, + {"spi-nor"}, { }, }; MODULE_DEVICE_TABLE(spi, m25p_ids); From 960bd2c26421d321e890f1936938196ead41976f Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 15 May 2015 21:15:29 +0200 Subject: [PATCH 139/481] netfilter: nf_tables: fix bogus warning in nft_data_uninit() The values 0x00000000-0xfffffeff are reserved for userspace datatype. When, deleting set elements with maps, a bogus warning is triggered. WARNING: CPU: 0 PID: 11133 at net/netfilter/nf_tables_api.c:4481 nft_data_uninit+0x35/0x40 [nf_tables]() This fixes the check accordingly to enum definition in include/linux/netfilter/nf_tables.h Fixes: https://bugzilla.netfilter.org/show_bug.cgi?id=1013 Signed-off-by: Mirek Kratochvil Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ad9d11fb29fd..34ded09317e7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4472,9 +4472,9 @@ EXPORT_SYMBOL_GPL(nft_data_init); */ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) { - switch (type) { - case NFT_DATA_VALUE: + if (type < NFT_DATA_VERDICT) return; + switch (type) { case NFT_DATA_VERDICT: return nft_verdict_uninit(data); default: From 1f9993f6825f9cb93f75f794b66e7428dfc72467 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 15 May 2015 12:53:21 +0800 Subject: [PATCH 140/481] rocker: fix a neigh entry leak issue Once we get a neighbour through looking up arp cache or creating a new one in rocker_port_ipv4_resolve(), the neighbour's refcount is already taken. But as we don't put the refcount again after it's used, this makes the neighbour entry leaked. Suggested-by: Eric Dumazet Acked-by: Jiri Pirko Acked-by: Eric Dumazet Signed-off-by: Ying Xue Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index ec251531bd9f..cf98cc9bbc8d 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -2921,10 +2921,11 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr); int err = 0; - if (!n) + if (!n) { n = neigh_create(&arp_tbl, &ip_addr, dev); - if (!n) - return -ENOMEM; + if (IS_ERR(n)) + return IS_ERR(n); + } /* If the neigh is already resolved, then go ahead and * install the entry, otherwise start the ARP process to @@ -2936,6 +2937,7 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, else neigh_event_send(n, NULL); + neigh_release(n); return err; } From 7e14069651591c81046ffaec13c3dac8cb70f5fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 May 2015 16:30:41 -0700 Subject: [PATCH 141/481] net: phy: Allow EEE for all RGMII variants RGMII interfaces come in multiple flavors: RGMII with transmit or receive internal delay, no delays at all, or delays in both direction. This change extends the initial check for PHY_INTERFACE_MODE_RGMII to cover all of these variants since EEE should be allowed for any of these modes, since it is a property of the RGMII, hence Gigabit PHY capability more than the RGMII electrical interface and its delays. Fixes: a59a4d192166 ("phy: add the EEE support and the way to access to the MMD registers") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 52cd8db2c57d..757f28a4284c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1053,13 +1053,14 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) { /* According to 802.3az,the EEE is supported only in full duplex-mode. * Also EEE feature is active when core is operating with MII, GMII - * or RGMII. Internal PHYs are also allowed to proceed and should - * return an error if they do not support EEE. + * or RGMII (all kinds). Internal PHYs are also allowed to proceed and + * should return an error if they do not support EEE. */ if ((phydev->duplex == DUPLEX_FULL) && ((phydev->interface == PHY_INTERFACE_MODE_MII) || (phydev->interface == PHY_INTERFACE_MODE_GMII) || - (phydev->interface == PHY_INTERFACE_MODE_RGMII) || + (phydev->interface >= PHY_INTERFACE_MODE_RGMII && + phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID) || phy_is_internal(phydev))) { int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; From c0bb07df7d981e4091432754e30c9c720e2c0c78 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 16 May 2015 21:50:28 +0800 Subject: [PATCH 142/481] netlink: Reset portid after netlink_insert failure The commit c5adde9468b0714a051eac7f9666f23eb10b61f7 ("netlink: eliminate nl_sk_hash_lock") breaks the autobind retry mechanism because it doesn't reset portid after a failed netlink_insert. This means that should autobind fail the first time around, then the socket will be stuck in limbo as it can never be bound again since it already has a non-zero portid. Fixes: c5adde9468b0 ("netlink: eliminate nl_sk_hash_lock") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dbe885901b34..bf6e76643f78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1081,6 +1081,7 @@ static int netlink_insert(struct sock *sk, u32 portid) if (err) { if (err == -EEXIST) err = -EADDRINUSE; + nlk_sk(sk)->portid = 0; sock_put(sk); } From db9683fb412d4af33f66b9fe3d8dace1c6d113c9 Mon Sep 17 00:00:00 2001 From: Tim Beale Date: Wed, 13 May 2015 13:55:04 +1200 Subject: [PATCH 143/481] net: phy: Make sure PHY_RESUMING state change is always processed If phy_start_aneg() was called while the phydev is in the PHY_RESUMING state, then its state would immediately transition to PHY_AN (or PHY_FORCING). This meant the phy_state_machine() never processed the PHY_RESUMING state change, which meant interrupts weren't enabled for the PHY. If the PHY used low-power mode (i.e. using BMCR_PDOWN), then the physical link wouldn't get powered up again. There seems no point for phy_start_aneg() to make the PHY_RESUMING --> PHY_AN transition, as the state machine will do this anyway. I'm not sure about the case where autoneg is disabled, as my patch will change behaviour so that the PHY goes to PHY_NOLINK instead of PHY_FORCING. An alternative solution would be to move the phy_config_interrupt() and phy_resume() work out of the state machine and into phy_start(). The background behind this: we're running linux v3.16.7 and from user-space we want to enable the eth port (i.e. do a SIOCSIFFLAGS ioctl with the IFF_UP flag) and immediately afterward set the interface's speed/duplex. Enabling the interface calls .ndo_open() then phy_start() and the PHY transitions PHY_HALTED --> PHY_RESUMING. Setting the speed/duplex ends up calling phy_ethtool_sset(), which calls phy_start_aneg() (meanwhile the phy_state_machine() hasn't processed the PHY_RESUMING state change yet). Signed-off-by: Tim Beale Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 757f28a4284c..710696d1af97 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -465,7 +465,7 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (phydev->state != PHY_HALTED) { + if (phydev->state != PHY_HALTED && phydev->state != PHY_RESUMING) { if (AUTONEG_ENABLE == phydev->autoneg) { phydev->state = PHY_AN; phydev->link_timeout = PHY_AN_TIMEOUT; From 07ee0722bf941960fb3888f9c9b5839473372fd1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 May 2015 11:30:47 +0800 Subject: [PATCH 144/481] rhashtable: Add cap on number of elements in hash table We currently have no limit on the number of elements in a hash table. This is a problem because some users (tipc) set a ceiling on the maximum table size and when that is reached the hash table may degenerate. Others may encounter OOM when growing and if we allow insertions when that happens the hash table perofrmance may also suffer. This patch adds a new paramater insecure_max_entries which becomes the cap on the table. If unset it defaults to max_size * 2. If it is also zero it means that there is no cap on the number of elements in the table. However, the table will grow whenever the utilisation hits 100% and if that growth fails, you will get ENOMEM on insertion. As allowing oversubscription is potentially dangerous, the name contains the word insecure. Note that the cap is not a hard limit. This is done for performance reasons as enforcing a hard limit will result in use of atomic ops that are heavier than the ones we currently use. The reasoning is that we're only guarding against a gross over- subscription of the table, rather than a small breach of the limit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++++++++++++++++++ lib/rhashtable.c | 11 +++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index dbcbcc59aa92..843ceca9a21e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -17,6 +17,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include #include #include #include @@ -100,6 +101,7 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed + * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -115,6 +117,7 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; + unsigned int insecure_max_entries; unsigned int max_size; unsigned int min_size; u32 nulls_base; @@ -286,6 +289,18 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return ht->p.insecure_max_entries && + atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -589,6 +604,10 @@ restart: goto out; } + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b28df4019ade..4396434e4715 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -14,6 +14,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -446,6 +447,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto exit; + err = -EAGAIN; if (rhashtable_check_elasticity(ht, tbl, hash) || rht_grow_above_100(ht, tbl)) @@ -738,6 +743,12 @@ int rhashtable_init(struct rhashtable *ht, if (params->max_size) ht->p.max_size = rounddown_pow_of_two(params->max_size); + if (params->insecure_max_entries) + ht->p.insecure_max_entries = + rounddown_pow_of_two(params->insecure_max_entries); + else + ht->p.insecure_max_entries = ht->p.max_size * 2; + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); /* The maximum (not average) chain length grows with the From ed2a80ab7b76f11af0b2c6255709c4ebf164b667 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 13 May 2015 14:19:42 +0200 Subject: [PATCH 145/481] rtnl/bond: don't send rtnl msg for unregistered iface Before the patch, the command 'ip link add bond2 type bond mode 802.3ad' causes the kernel to send a rtnl message for the bond2 interface, with an ifindex 0. 'ip monitor' shows: 0: bond2: mtu 1500 state DOWN group default link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff 9: bond2@NONE: mtu 1500 qdisc noop state DOWN group default link/ether ea:3e:1f:53:92:7b brd ff:ff:ff:ff:ff:ff [snip] The patch fixes the spotted bug by checking in bond driver if the interface is registered before calling the notifier chain. It also adds a check in rtmsg_ifinfo() to prevent this kind of bug in the future. Fixes: d4261e565000 ("bonding: create netlink event when bonding option is changed") CC: Jiri Pirko Reported-by: Julien Meunier Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 2 +- net/core/rtnetlink.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4df28943d222..e8d3c1d35453 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -624,7 +624,7 @@ int __bond_opt_set(struct bonding *bond, out: if (ret) bond_opt_error_interpret(bond, opt, ret, val); - else + else if (bond->dev->reg_state == NETREG_REGISTERED) call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev); return ret; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 666e0928ba40..8de36824018d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2416,6 +2416,9 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, { struct sk_buff *skb; + if (dev->reg_state != NETREG_REGISTERED) + return; + skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); if (skb) rtmsg_ifinfo_send(skb, dev, flags); From 21858cd02dabcf290564cbf4769b101eba54d7bb Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Sat, 16 May 2015 00:24:59 +0200 Subject: [PATCH 146/481] tcp/ipv6: fix flow label setting in TIME_WAIT state commit 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT") added the flow label in the last TCP packets. Unfortunately, it was not casted properly. This patch replace the buggy shift with be32_to_cpu/cpu_to_be32. Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages") Reported-by: Eric Dumazet Signed-off-by: Florent Fourcot Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e5d7649136fc..b5732a54f2ad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; - tw->tw_flowlabel = np->flow_label >> 12; + tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); tw->tw_ipv6only = sk->sk_ipv6only; } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6575d665568..3adffb300238 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -914,7 +914,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), - tw->tw_tclass, (tw->tw_flowlabel << 12)); + tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); inet_twsk_put(tw); } From 7b2a18e05feb86f9be25602abfa9604a6b977f79 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Fri, 15 May 2015 10:18:37 -0700 Subject: [PATCH 147/481] crypto: algif_aead - fix invalid sgl linking This patch fixes it. Also minor updates to comments. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 00a6fe166fed..69abada22373 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -33,7 +33,7 @@ struct aead_ctx { /* * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES - * bytes + * pages */ #define RSGL_MAX_ENTRIES ALG_MAX_PAGES struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES]; @@ -435,11 +435,10 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored, if (err < 0) goto unlock; usedpages += err; - /* chain the new scatterlist with initial list */ + /* chain the new scatterlist with previous one */ if (cnt) - scatterwalk_crypto_chain(ctx->rsgl[0].sg, - ctx->rsgl[cnt].sg, 1, - sg_nents(ctx->rsgl[cnt-1].sg)); + af_alg_link_sg(&ctx->rsgl[cnt-1], &ctx->rsgl[cnt]); + /* we do not need more iovecs as we have sufficient memory */ if (outlen <= usedpages) break; From 1f8b46cdc806dfd76386f8442d9a6d0ae69abb25 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 12 May 2015 14:38:15 +0200 Subject: [PATCH 148/481] ALSA: hda - Fix headset mic and mic-in for a Dell desktop ALC662 does not need any special verbs to change the jack functionality, and enables mic in through the headphone jack mode by changing the direction of the headphone pin node. BugLink: https://bugs.launchpad.net/bugs/1454235 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 46 ++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 37c2c9f91df0..4202a80db2ce 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3674,6 +3674,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, alc_process_coef_fw(codec, coef0293); snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50); break; + case 0x10ec0662: + snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); + snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50); + break; case 0x10ec0668: alc_write_coef_idx(codec, 0x11, 0x0001); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); @@ -4012,7 +4016,7 @@ static void alc_update_headset_mode(struct hda_codec *codec) if (new_headset_mode != ALC_HEADSET_MODE_MIC) { snd_hda_set_pin_ctl_cache(codec, hp_pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN); - if (spec->headphone_mic_pin) + if (spec->headphone_mic_pin && spec->headphone_mic_pin != hp_pin) snd_hda_set_pin_ctl_cache(codec, spec->headphone_mic_pin, PIN_VREFHIZ); } @@ -4215,6 +4219,18 @@ static void alc_fixup_dell_xps13(struct hda_codec *codec, } } +static void alc_fixup_headset_mode_alc662(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + spec->gen.hp_mic = 1; /* Mic-in is same pin as headphone */ + } else + alc_fixup_headset_mode(codec, fix, action); +} + static void alc_fixup_headset_mode_alc668(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6087,7 +6103,9 @@ enum { ALC662_FIXUP_NO_JACK_DETECT, ALC662_FIXUP_ZOTAC_Z68, ALC662_FIXUP_INV_DMIC, + ALC662_FIXUP_DELL_MIC_NO_PRESENCE, ALC668_FIXUP_DELL_MIC_NO_PRESENCE, + ALC662_FIXUP_HEADSET_MODE, ALC668_FIXUP_HEADSET_MODE, ALC662_FIXUP_BASS_MODE4_CHMAP, ALC662_FIXUP_BASS_16, @@ -6280,6 +6298,20 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE }, + [ALC662_FIXUP_DELL_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a1113c }, /* use as headset mic, without its own jack detect */ + /* headphone mic by setting pin control of 0x1b (headphone out) to in + vref_50 */ + { } + }, + .chained = true, + .chain_id = ALC662_FIXUP_HEADSET_MODE + }, + [ALC662_FIXUP_HEADSET_MODE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_mode_alc662, + }, [ALC668_FIXUP_DELL_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -6431,6 +6463,18 @@ static const struct hda_model_fixup alc662_fixup_models[] = { }; static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0662, 0x1028, "Dell", ALC662_FIXUP_DELL_MIC_NO_PRESENCE, + {0x12, 0x4004c000}, + {0x14, 0x01014010}, + {0x15, 0x411111f0}, + {0x16, 0x411111f0}, + {0x18, 0x01a19020}, + {0x19, 0x411111f0}, + {0x1a, 0x0181302f}, + {0x1b, 0x0221401f}, + {0x1c, 0x411111f0}, + {0x1d, 0x4054c601}, + {0x1e, 0x411111f0}), SND_HDA_PIN_QUIRK(0x10ec0668, 0x1028, "Dell", ALC668_FIXUP_AUTO_MUTE, {0x12, 0x99a30130}, {0x14, 0x90170110}, From 6ffc0898b29a2811a6c0569c5dd9b581980110df Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 13 May 2015 13:28:54 +0200 Subject: [PATCH 149/481] ALSA: hda - Add Conexant codecs CX20721, CX20722, CX20723 and CX20724 This patch adds support for Conexant HD Audio codecs CX20721, CX20722, CX20723 and CX20724. Cc: stable@vger.kernel.org BugLink: https://bugs.launchpad.net/bugs/1454656 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f8f0dfbef149..78b719b5b34d 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -968,6 +968,14 @@ static const struct hda_codec_preset snd_hda_preset_conexant[] = { .patch = patch_conexant_auto }, { .id = 0x14f150b9, .name = "CX20665", .patch = patch_conexant_auto }, + { .id = 0x14f150f1, .name = "CX20721", + .patch = patch_conexant_auto }, + { .id = 0x14f150f2, .name = "CX20722", + .patch = patch_conexant_auto }, + { .id = 0x14f150f3, .name = "CX20723", + .patch = patch_conexant_auto }, + { .id = 0x14f150f4, .name = "CX20724", + .patch = patch_conexant_auto }, { .id = 0x14f1510f, .name = "CX20751/2", .patch = patch_conexant_auto }, { .id = 0x14f15110, .name = "CX20751/2", @@ -1002,6 +1010,10 @@ MODULE_ALIAS("snd-hda-codec-id:14f150ab"); MODULE_ALIAS("snd-hda-codec-id:14f150ac"); MODULE_ALIAS("snd-hda-codec-id:14f150b8"); MODULE_ALIAS("snd-hda-codec-id:14f150b9"); +MODULE_ALIAS("snd-hda-codec-id:14f150f1"); +MODULE_ALIAS("snd-hda-codec-id:14f150f2"); +MODULE_ALIAS("snd-hda-codec-id:14f150f3"); +MODULE_ALIAS("snd-hda-codec-id:14f150f4"); MODULE_ALIAS("snd-hda-codec-id:14f1510f"); MODULE_ALIAS("snd-hda-codec-id:14f15110"); MODULE_ALIAS("snd-hda-codec-id:14f15111"); From 09ea997677cd44ebe7f42573119aaf46b775c683 Mon Sep 17 00:00:00 2001 From: Ansgar Hegerfeld Date: Thu, 14 May 2015 12:31:32 +0200 Subject: [PATCH 150/481] ALSA: hda/realtek - ALC292 dock fix for Thinkpad L450 The Lenovo ThinkPad L450 requires the ALC292_FIXUP_TPT440_DOCK fix in order to get sound output on the docking stations audio port. This patch was tested using a ThinkPad L450 (20DSS00B00) using kernel 4.0.3 and a ThinkPad Pro Dock. Signed-off-by: Ansgar Hegerfeld Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4202a80db2ce..2e246fe495f6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5165,6 +5165,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), From 60c8f783a18feb95ad967c87e9660caf09fb4700 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Wed, 6 May 2015 15:16:46 +0200 Subject: [PATCH 151/481] mmc: atmel-mci: fix bad variable type for clkdiv clkdiv is declared as an u32 but it can be set to a negative value causing a huge divisor value. Change its type to int to avoid this case. Signed-off-by: Ludovic Desroches Cc: # 3.4 and later Signed-off-by: Ulf Hansson --- drivers/mmc/host/atmel-mci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 03d7c7521d97..9a39e0b7e583 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1304,7 +1304,7 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (ios->clock) { unsigned int clock_min = ~0U; - u32 clkdiv; + int clkdiv; spin_lock_bh(&host->lock); if (!host->mode_reg) { @@ -1328,7 +1328,12 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* Calculate clock divider */ if (host->caps.has_odd_clk_div) { clkdiv = DIV_ROUND_UP(host->bus_hz, clock_min) - 2; - if (clkdiv > 511) { + if (clkdiv < 0) { + dev_warn(&mmc->class_dev, + "clock %u too fast; using %lu\n", + clock_min, host->bus_hz / 2); + clkdiv = 0; + } else if (clkdiv > 511) { dev_warn(&mmc->class_dev, "clock %u too slow; using %lu\n", clock_min, host->bus_hz / (511 + 2)); From 17fea54bf0ab34fa09a06bbde2f58ed7bbdf9299 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 18 May 2015 10:07:17 +0200 Subject: [PATCH 152/481] x86/mce: Fix MCE severity messages Derek noticed that a critical MCE gets reported with the wrong error type description: [Hardware Error]: CPU 34: Machine Check Exception: 5 Bank 9: f200003f000100b0 [Hardware Error]: RIP !INEXACT! 10: {intel_idle+0xb1/0x170} [Hardware Error]: TSC 49587b8e321cb [Hardware Error]: PROCESSOR 0:306e4 TIME 1431561296 SOCKET 1 APIC 29 [Hardware Error]: Some CPUs didn't answer in synchronization [Hardware Error]: Machine check: Invalid ^^^^^^^ The last line with 'Invalid' should have printed the high level MCE error type description we get from mce_severity, i.e. something like: [Hardware Error]: Machine check: Action required: data load error in a user process this happens due to the fact that mce_no_way_out() iterates over all MCA banks and possibly overwrites the @msg argument which is used in the panic printing later. Change behavior to take the message of only and the (last) critical MCE it detects. Reported-by: Derek Signed-off-by: Borislav Petkov Cc: Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1431936437-25286-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..20190bdac9d5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { int i, ret = 0; + char *tmp; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); @@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, mca_cfg.tolerant, msg, true) >= - MCE_PANIC_SEVERITY) + + if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + *msg = tmp; ret = 1; + } } return ret; } From ea8e080b604e2c8221af778221d83a59b6529c5b Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Mon, 18 May 2015 10:07:16 +0200 Subject: [PATCH 153/481] x86/Documentation: Update the contact email for L3 cache index disable functionality The mailing list discuss@x86-64.org is now defunct. Use the lkml in its place. Signed-off-by: Aravind Gopalakrishnan Signed-off-by: Borislav Petkov Cc: Greg KH Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: sboyd@codeaurora.org Cc: sudeep.holla@arm.com Link: http://lkml.kernel.org/r/1431125098-9470-1-git-send-email-Aravind.Gopalakrishnan@amd.com Link: http://lkml.kernel.org/r/1431936437-25286-2-git-send-email-bp@alien8.de [ Changed the contact email to lkml. ] Signed-off-by: Ingo Molnar --- Documentation/ABI/testing/sysfs-devices-system-cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 99983e67c13c..da95513571ea 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -162,7 +162,7 @@ Description: Discover CPUs in the same CPU frequency coordination domain What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} Date: August 2008 KernelVersion: 2.6.27 -Contact: discuss@x86-64.org +Contact: Linux kernel mailing list Description: Disable L3 cache indices These files exist in every CPU's cache/index3 directory. Each From ffb7dbed47da6ac4460b606a3feee295bbe4d9e2 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 7 May 2015 12:55:23 -0400 Subject: [PATCH 154/481] xen/arm: Define xen_arch_suspend() Commit 2b953a5e994c ("xen: Suspend ticks on all CPUs during suspend") introduced xen_arch_suspend() routine but did so only for x86, breaking ARM builds. We need to add it to ARM as well. Signed-off-by: Boris Ostrovsky Reported-by: Michal Suchanek Tested-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/arm/xen/enlighten.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 224081ccc92f..7d0f07020c80 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { } void xen_arch_post_suspend(int suspend_cancelled) { } void xen_timer_resume(void) { } void xen_arch_resume(void) { } +void xen_arch_suspend(void) { } /* In the hypervisor.S file. */ From 314fdf4473bc05fa9a1f33b27b98eb0f0c836f71 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 19 May 2015 11:32:32 +0530 Subject: [PATCH 155/481] RDMA/ocrdma: Fix EQ destroy failure during driver unload Changing the destroy sequence of mailbox queue and event queues. FW expects mailbox queue to be destroyed before desroying the EQs. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 0c9e95909a64..3a5ea5afc2a2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -3147,9 +3147,9 @@ void ocrdma_cleanup_hw(struct ocrdma_dev *dev) ocrdma_free_pd_pool(dev); ocrdma_mbx_delete_ah_tbl(dev); - /* cleanup the eqs */ - ocrdma_destroy_eqs(dev); - /* cleanup the control path */ ocrdma_destroy_mq(dev); + + /* cleanup the eqs */ + ocrdma_destroy_eqs(dev); } From 5e6f9237f8e676e8a9110b4dafed36b1dd0b5d84 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 19 May 2015 11:32:33 +0530 Subject: [PATCH 156/481] RDMA/ocrdma: Report EQ full fatal error Detect when Event Queue (EQ) becomes full and print a warning message. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 ++++++ drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 3a5ea5afc2a2..65759acb12dc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -933,12 +933,18 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle) struct ocrdma_eqe eqe; struct ocrdma_eqe *ptr; u16 cq_id; + u8 mcode; int budget = eq->cq_cnt; do { ptr = ocrdma_get_eqe(eq); eqe = *ptr; ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); + mcode = (eqe.id_valid & OCRDMA_EQE_MAJOR_CODE_MASK) + >> OCRDMA_EQE_MAJOR_CODE_SHIFT; + if (mcode == OCRDMA_MAJOR_CODE_SENTINAL) + pr_err("EQ full on eqid = 0x%x, eqe = 0x%x\n", + eq->q.id, eqe.id_valid); if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) break; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 243c87c8bd65..baf9b8a0f278 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1624,12 +1624,19 @@ struct ocrdma_delete_ah_tbl_rsp { enum { OCRDMA_EQE_VALID_SHIFT = 0, OCRDMA_EQE_VALID_MASK = BIT(0), + OCRDMA_EQE_MAJOR_CODE_MASK = 0x0E, + OCRDMA_EQE_MAJOR_CODE_SHIFT = 0x01, OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << OCRDMA_EQE_RESOURCE_ID_SHIFT, }; +enum major_code { + OCRDMA_MAJOR_CODE_COMPLETION = 0x00, + OCRDMA_MAJOR_CODE_SENTINAL = 0x01 +}; + struct ocrdma_eqe { u32 id_valid; }; From fe48822bc6d5b455aab963038a9c776d641645cc Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 19 May 2015 11:32:34 +0530 Subject: [PATCH 157/481] RDMA/ocrdma: Fix QP state transition in destroy_qp Don't move QP to error state, if QP is in reset state during QP destroy operation. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 877175563634..06e8ab7825b9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1721,18 +1721,20 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) struct ocrdma_qp *qp; struct ocrdma_dev *dev; struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; + int attr_mask; unsigned long flags; qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); - attrs.qp_state = IB_QPS_ERR; pd = qp->pd; /* change the QP state to ERROR */ - _ocrdma_modify_qp(ibqp, &attrs, attr_mask); - + if (qp->state != OCRDMA_QPS_RST) { + attrs.qp_state = IB_QPS_ERR; + attr_mask = IB_QP_STATE; + _ocrdma_modify_qp(ibqp, &attrs, attr_mask); + } /* ensure that CQEs for newly created QP (whose id may be same with * one which just getting destroyed are same), dont get * discarded until the old CQEs are discarded. From 6f5deab0bed6bcfad0dbafcb40a1e269a01ab01d Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 19 May 2015 11:32:35 +0530 Subject: [PATCH 158/481] RDMA/ocrdma: Use VID 0 if PFC is enabled and vlan is not configured If the adapter ports are in PFC mode and VLAN is not configured, use vlan tag 0 for RoCE traffic. Also, log an advisory message in system logs. Signed-off-by: Padmanabh Ratnakar Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 8 +++++++- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 12 +++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d812904f3984..2a5993b75651 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -56,7 +56,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; - if (vlan_tag && (vlan_tag < 0x1000)) { + if (vlan_tag || dev->pfc_state) { + if (!vlan_tag) { + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 65759acb12dc..da688d78fc02 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2434,7 +2434,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid, zgid; - u32 vlan_id; + u32 vlan_id = 0xFFFF; u8 mac_addr[6]; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2474,12 +2474,22 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); if (attr_mask & IB_QP_VID) { vlan_id = attrs->vlan_id; + } else if (dev->pfc_state) { + vlan_id = 0; + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } + + if (vlan_id < 0x1000) { cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; cmd->params.rnt_rc_sl_fl |= (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } + return 0; } From 038ab8b7432c0280f064a47173dc5b6412243674 Mon Sep 17 00:00:00 2001 From: Mitesh Ahuja Date: Tue, 19 May 2015 11:32:36 +0530 Subject: [PATCH 159/481] RDMA/ocrdma: Fix the request length for RDMA_QUERY_QP mailbox command to FW. Fix ocrdma_query_qp to pass correct mailbox request length to FW. Signed-off-by: Mitesh Ahuja Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index da688d78fc02..5636eb6f9307 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2412,7 +2412,7 @@ int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_query_qp *cmd; struct ocrdma_query_qp_rsp *rsp; - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd)); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*rsp)); if (!cmd) return status; cmd->qp_id = qp->id; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index baf9b8a0f278..02ad0aee99af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1176,6 +1176,8 @@ struct ocrdma_query_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; struct ocrdma_qp_params params; + u32 dpp_credits_cqid; + u32 rbq_id; }; enum { From 59582d86e23f30466413a04cc4db678b09f937ea Mon Sep 17 00:00:00 2001 From: Mitesh Ahuja Date: Tue, 19 May 2015 11:32:37 +0530 Subject: [PATCH 160/481] RDMA/ocrdma: Prevent allocation of DPP PDs if FW doesnt support it If DPP PDs are not supported by the FW, allocate only normal PDs. Signed-off-by: Mitesh Ahuja Signed-off-by: Padmanabh Ratnakar Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 51 ++++++++++----------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 5636eb6f9307..ac7347ad90be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1440,27 +1440,30 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) struct ocrdma_alloc_pd_range_rsp *rsp; /* Pre allocate the DPP PDs */ - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd)); - if (!cmd) - return -ENOMEM; - cmd->pd_count = dev->attr.max_dpp_pds; - cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP; - status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; - rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; + if (dev->attr.max_dpp_pds) { + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, + sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + cmd->pd_count = dev->attr.max_dpp_pds; + cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; - if ((rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) && rsp->pd_count) { - dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >> - OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT; - dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid & - OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; - dev->pd_mgr->max_dpp_pd = rsp->pd_count; - pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long); - dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size, - GFP_KERNEL); + if (!status && (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) && + rsp->pd_count) { + dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >> + OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT; + dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid & + OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; + dev->pd_mgr->max_dpp_pd = rsp->pd_count; + pd_bitmap_size = + BITS_TO_LONGS(rsp->pd_count) * sizeof(long); + dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size, + GFP_KERNEL); + } + kfree(cmd); } - kfree(cmd); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd)); if (!cmd) @@ -1468,10 +1471,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) cmd->pd_count = dev->attr.max_pd - dev->attr.max_dpp_pds; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; - if (rsp->pd_count) { + if (!status && rsp->pd_count) { dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; dev->pd_mgr->max_normal_pd = rsp->pd_count; @@ -1479,15 +1480,13 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size, GFP_KERNEL); } + kfree(cmd); if (dev->pd_mgr->pd_norm_bitmap || dev->pd_mgr->pd_dpp_bitmap) { /* Enable PD resource manager */ dev->pd_mgr->pd_prealloc_valid = true; - } else { - return -ENOMEM; + return 0; } -mbx_err: - kfree(cmd); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06e8ab7825b9..9dcb66077d6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -365,7 +365,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (!pd) return ERR_PTR(-ENOMEM); - if (udata && uctx) { + if (udata && uctx && dev->attr.max_dpp_pds) { pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = From d27b2f15eb535b4f02a75070108c08cefb540257 Mon Sep 17 00:00:00 2001 From: Mitesh Ahuja Date: Tue, 19 May 2015 11:32:38 +0530 Subject: [PATCH 161/481] RDMA/ocrdma: Fix dmac resolution for link local address rdma_addr_find_dmac_by_grh fails to resolve dmac for link local address. Use rdma_get_ll_mac to resolve the link local address. Signed-off-by: Padmanabh Ratnakar Signed-off-by: Mitesh Ahuja Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 ++ drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index c9780d919769..ee9e3351a64c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -515,6 +515,8 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) rdma_get_mcast_mac(&in6, mac_addr); + else if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac_addr); else memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); return 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 2a5993b75651..f5a5ea836dbd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -127,7 +127,9 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) goto av_conf_err; } - if (pd->uctx) { + if ((pd->uctx) && + (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && + (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, attr->dmac, &attr->vlan_id); if (status) { From 72d8a013d75fc845a34451bdce3d3a3dd97488f1 Mon Sep 17 00:00:00 2001 From: Naga Irrinki Date: Tue, 19 May 2015 11:32:39 +0530 Subject: [PATCH 162/481] RDMA/ocrdma: Fail connection for MTU lesser than 512 HW currently restricts the IB MTU range between 512 and 4096. Fail connection for MTUs lesser than 512. Signed-off-by: Naga Irrinki Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ac7347ad90be..47615ff33bc6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2534,8 +2534,10 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID; } if (attr_mask & IB_QP_PATH_MTU) { - if (attrs->path_mtu < IB_MTU_256 || + if (attrs->path_mtu < IB_MTU_512 || attrs->path_mtu > IB_MTU_4096) { + pr_err("ocrdma%d: IB MTU %d is not supported\n", + dev->id, ib_mtu_enum_to_int(attrs->path_mtu)); status = -EINVAL; goto pmtu_err; } From 235dfcd47e4442b2d8766bed3ebaf655b36c7f1c Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 19 May 2015 11:32:40 +0530 Subject: [PATCH 163/481] RDMA/ocrdma: Update ocrdma version number Updating the driver version to 10.6.0.0 Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index ee9e3351a64c..b396344fae16 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -40,7 +40,7 @@ #include #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.4.205.0u" +#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" From ab992dc38f9ae40b3ab996d68449692d464c98cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 May 2015 11:31:50 +0200 Subject: [PATCH 164/481] watchdog: Fix merge 'conflict' Two watchdog changes that came through different trees had a non conflicting conflict, that is, one changed the semantics of a variable but no actual code conflict happened. So the merge appeared fine, but the resulting code did not behave as expected. Commit 195daf665a62 ("watchdog: enable the new user interface of the watchdog mechanism") changes the semantics of watchdog_user_enabled, which thereafter is only used by the functions introduced by b3738d293233 ("watchdog: Add watchdog enable/disable all functions"). There further appears to be a distinct lack of serialization between setting and using watchdog_enabled, so perhaps we should wrap the {en,dis}able_all() things in watchdog_proc_mutex. This patch fixes a s2r failure reported by Michal; which I cannot readily explain. But this does make the code internally consistent again. Reported-and-tested-by: Michal Hocko Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2316f50b07a4..506edcc500c4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,6 +41,8 @@ #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +static DEFINE_MUTEX(watchdog_proc_mutex); + #ifdef CONFIG_HARDLOCKUP_DETECTOR static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else @@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void) { int cpu; - if (!watchdog_user_enabled) - return; + mutex_lock(&watchdog_proc_mutex); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); + +unlock: + mutex_lock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) { int cpu; + mutex_lock(&watchdog_proc_mutex); + if (!watchdog_running) - return; + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } @@ -744,8 +756,6 @@ static int proc_watchdog_update(void) } -static DEFINE_MUTEX(watchdog_proc_mutex); - /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * From e26081808edadfd257c6c9d81014e3b25e9a6118 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 May 2015 10:13:47 -0700 Subject: [PATCH 165/481] Linux 4.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eae539d69bf3..dc20bcb9b271 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* From 74856fbf441929918c49ff262ace9835048e4e6a Mon Sep 17 00:00:00 2001 From: Mark Hounschell Date: Wed, 13 May 2015 10:49:09 +0200 Subject: [PATCH 166/481] sd: Disable support for 256 byte/sector disks 256 bytes per sector support has been broken since 2.6.X, and no-one stepped up to fix this. So disable support for it. Signed-off-by: Mark Hounschell Signed-off-by: Hannes Reinecke Cc: stable@vger.kernel.org Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 79beebf53302..7f9d65fe4fd9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1600,6 +1600,7 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) { u64 start_lba = blk_rq_pos(scmd->request); u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512); + u64 factor = scmd->device->sector_size / 512; u64 bad_lba; int info_valid; /* @@ -1621,16 +1622,9 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) if (scsi_bufflen(scmd) <= scmd->device->sector_size) return 0; - if (scmd->device->sector_size < 512) { - /* only legitimate sector_size here is 256 */ - start_lba <<= 1; - end_lba <<= 1; - } else { - /* be careful ... don't want any overflows */ - unsigned int factor = scmd->device->sector_size / 512; - do_div(start_lba, factor); - do_div(end_lba, factor); - } + /* be careful ... don't want any overflows */ + do_div(start_lba, factor); + do_div(end_lba, factor); /* The bad lba was reported incorrectly, we have no idea where * the error is. @@ -2188,8 +2182,7 @@ got_data: if (sector_size != 512 && sector_size != 1024 && sector_size != 2048 && - sector_size != 4096 && - sector_size != 256) { + sector_size != 4096) { sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", sector_size); /* @@ -2244,8 +2237,6 @@ got_data: sdkp->capacity <<= 2; else if (sector_size == 1024) sdkp->capacity <<= 1; - else if (sector_size == 256) - sdkp->capacity >>= 1; blk_queue_physical_block_size(sdp->request_queue, sdkp->physical_block_size); From 4627de932d5528ede89ee3ea84ef6339a906e58d Mon Sep 17 00:00:00 2001 From: Minh Tran Date: Thu, 14 May 2015 23:16:17 -0700 Subject: [PATCH 167/481] MAINTAINERS, be2iscsi: change email domain be2iscsi change of ownership from Emulex to Avago Technologies recently. We like to get the following updates in: changed "Emulex" to "Avago Technologies", changed email addresses from "emulex.com" to "avagotech.com", updated MAINTAINER list for be2iscsi driver. Signed-off-by: Minh Tran Signed-off-by: Jayamohan Kallickal Signed-off-by: James Bottomley --- MAINTAINERS | 6 ++++-- drivers/scsi/be2iscsi/be.h | 6 +++--- drivers/scsi/be2iscsi/be_cmds.c | 6 +++--- drivers/scsi/be2iscsi/be_cmds.h | 6 +++--- drivers/scsi/be2iscsi/be_iscsi.c | 8 ++++---- drivers/scsi/be2iscsi/be_iscsi.h | 8 ++++---- drivers/scsi/be2iscsi/be_main.c | 12 ++++++------ drivers/scsi/be2iscsi/be_main.h | 10 +++++----- drivers/scsi/be2iscsi/be_mgmt.c | 8 ++++---- drivers/scsi/be2iscsi/be_mgmt.h | 8 ++++---- 10 files changed, 40 insertions(+), 38 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2e5bbc0d68b2..2c6926d2776a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8794,9 +8794,11 @@ F: drivers/misc/phantom.c F: include/uapi/linux/phantom.h SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER -M: Jayamohan Kallickal +M: Jayamohan Kallickal +M: Minh Tran +M: John Soni Jose L: linux-scsi@vger.kernel.org -W: http://www.emulex.com +W: http://www.avagotech.com S: Supported F: drivers/scsi/be2iscsi/ diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h index 81e83a65a193..32070099c333 100644 --- a/drivers/scsi/be2iscsi/be.h +++ b/drivers/scsi/be2iscsi/be.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -8,9 +8,9 @@ * Public License is included in this distribution in the file called COPYING. * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_cmds.c b/drivers/scsi/be2iscsi/be_cmds.c index 1028760b8a22..447cf7ce606e 100644 --- a/drivers/scsi/be2iscsi/be_cmds.c +++ b/drivers/scsi/be2iscsi/be_cmds.c @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -8,9 +8,9 @@ * Public License is included in this distribution in the file called COPYING. * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_cmds.h b/drivers/scsi/be2iscsi/be_cmds.h index 98897434bcb4..f11d325fe696 100644 --- a/drivers/scsi/be2iscsi/be_cmds.h +++ b/drivers/scsi/be2iscsi/be_cmds.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -8,9 +8,9 @@ * Public License is included in this distribution in the file called COPYING. * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index b7391a3f9f0b..2f0700796842 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_iscsi.h b/drivers/scsi/be2iscsi/be_iscsi.h index e0b3b2d1f27a..0c84e1c0763a 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.h +++ b/drivers/scsi/be2iscsi/be_iscsi.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 923a2b5a2439..1f74760ce86c 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ @@ -50,7 +50,7 @@ static unsigned int enable_msix = 1; MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR); MODULE_VERSION(BUILD_STR); -MODULE_AUTHOR("Emulex Corporation"); +MODULE_AUTHOR("Avago Technologies"); MODULE_LICENSE("GPL"); module_param(be_iopoll_budget, int, 0); module_param(enable_msix, int, 0); @@ -552,7 +552,7 @@ MODULE_DEVICE_TABLE(pci, beiscsi_pci_id_table); static struct scsi_host_template beiscsi_sht = { .module = THIS_MODULE, - .name = "Emulex 10Gbe open-iscsi Initiator Driver", + .name = "Avago Technologies 10Gbe open-iscsi Initiator Driver", .proc_name = DRV_NAME, .queuecommand = iscsi_queuecommand, .change_queue_depth = scsi_change_queue_depth, diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 7ee0ffc38514..e70ea26bbc2b 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ @@ -37,7 +37,7 @@ #define DRV_NAME "be2iscsi" #define BUILD_STR "10.4.114.0" -#define BE_NAME "Emulex OneConnect" \ +#define BE_NAME "Avago Technologies OneConnect" \ "Open-iSCSI Driver version" BUILD_STR #define DRV_DESC BE_NAME " " "Driver" diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c index 681d4e8f003a..c2c4d6975fb7 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.c +++ b/drivers/scsi/be2iscsi/be_mgmt.c @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ diff --git a/drivers/scsi/be2iscsi/be_mgmt.h b/drivers/scsi/be2iscsi/be_mgmt.h index bd81446936fc..9356b9a86b66 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.h +++ b/drivers/scsi/be2iscsi/be_mgmt.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Avago Technologies * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -7,12 +7,12 @@ * as published by the Free Software Foundation. The full GNU General * Public License is included in this distribution in the file called COPYING. * - * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com) + * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com) * * Contact Information: - * linux-drivers@emulex.com + * linux-drivers@avagotech.com * - * Emulex + * Avago Technologies * 3333 Susan Street * Costa Mesa, CA 92626 */ From 32505876c0947a4cecc409dfbef1fc58ced6138d Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 15 May 2015 13:10:58 -0400 Subject: [PATCH 168/481] MAINTAINERS: Revise lpfc maintainers for Avago Technologies ownership of Emulex The old email addresses will go away very soon. Revising with new addresses. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: James Bottomley --- MAINTAINERS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2c6926d2776a..d42115a6de2b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3803,10 +3803,11 @@ M: David Woodhouse L: linux-embedded@vger.kernel.org S: Maintained -EMULEX LPFC FC SCSI DRIVER -M: James Smart +EMULEX/AVAGO LPFC FC/FCOE SCSI DRIVER +M: James Smart +M: Dick Kennedy L: linux-scsi@vger.kernel.org -W: http://sourceforge.net/projects/lpfcxxxx +W: http://www.avagotech.com S: Supported F: drivers/scsi/lpfc/ From 8d2812849acbc13c07bdad8a0a55a342ec1ce3a4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 May 2015 18:07:44 +0100 Subject: [PATCH 169/481] ARM: 8357/1: perf: fix memory leak when probing PMU PPIs Commit 338d9dd3e2ae ("ARM: 8351/1: perf: don't warn about missing interrupt-affinity property for PPIs") added a check for PPIs so that we avoid parsing the interrupt-affinity property for these naturally affine interrupts. Unfortunately, this check can trigger an early (successful) return and we will leak the irqs array. This patch fixes the issue by reordering the code so that the check is performed before any independent allocation. Reported-by: David Binderman Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event_cpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 213919ba326f..3b8c2833c537 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -304,16 +304,17 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int of_pmu_irq_cfg(struct platform_device *pdev) { int i, irq; - int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - - if (!irqs) - return -ENOMEM; + int *irqs; /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) return 0; + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + for (i = 0; i < pdev->num_resources; ++i) { struct device_node *dn; int cpu; From 13c3ed6a92724d8c8cb148a14b0ae190ddfe7413 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 18 May 2015 13:51:24 -0400 Subject: [PATCH 170/481] vxlan: correct typo in call to unregister_netdevice_queue By inspection, this appears to be a typo. The gating comparison involves vxlan->dev rather than dev. In fact, dev is the iterator in the preceding loop above but it is actually constant in the 2nd loop. Use of dev seems to be a bad cut-n-paste from the prior call to unregister_netdevice_queue. Change dev to vxlan->dev, since that is what is actually being checked. Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 27a5f954f8e9..21a0fbf1ed94 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2961,7 +2961,7 @@ static void __net_exit vxlan_exit_net(struct net *net) * to the list by the previous loop. */ if (!net_eq(dev_net(vxlan->dev), net)) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(vxlan->dev, &list); } unregister_netdevice_many(&list); From 10d784eae2b41e25d8fc6a88096cd27286093c84 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 8 May 2015 10:51:29 -0700 Subject: [PATCH 171/481] sched: always use blk_schedule_flush_plug in io_schedule_out block plug callback could sleep, so we introduce a parameter 'from_schedule' and corresponding drivers can use it to destinguish a schedule plug flush or a plug finish. Unfortunately io_schedule_out still uses blk_flush_plug(). This causes below output (Note, I added a might_sleep() in raid1_unplug to make it trigger faster, but the whole thing doesn't matter if I add might_sleep). In raid1/10, this can cause deadlock. This patch makes io_schedule_out always uses blk_schedule_flush_plug. This should only impact drivers (as far as I know, raid 1/10) which are sensitive to the 'from_schedule' parameter. [ 370.817949] ------------[ cut here ]------------ [ 370.817960] WARNING: CPU: 7 PID: 145 at ../kernel/sched/core.c:7306 __might_sleep+0x7f/0x90() [ 370.817969] do not call blocking ops when !TASK_RUNNING; state=2 set at [] prepare_to_wait+0x2f/0x90 [ 370.817971] Modules linked in: raid1 [ 370.817976] CPU: 7 PID: 145 Comm: kworker/u16:9 Tainted: G W 4.0.0+ #361 [ 370.817977] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153802- 04/01/2014 [ 370.817983] Workqueue: writeback bdi_writeback_workfn (flush-9:1) [ 370.817985] ffffffff81cd83be ffff8800ba8cb298 ffffffff819dd7af 0000000000000001 [ 370.817988] ffff8800ba8cb2e8 ffff8800ba8cb2d8 ffffffff81051afc ffff8800ba8cb2c8 [ 370.817990] ffffffffa00061a8 000000000000041e 0000000000000000 ffff8800ba8cba28 [ 370.817993] Call Trace: [ 370.817999] [] dump_stack+0x4f/0x7b [ 370.818002] [] warn_slowpath_common+0x8c/0xd0 [ 370.818004] [] warn_slowpath_fmt+0x46/0x50 [ 370.818006] [] ? prepare_to_wait+0x2f/0x90 [ 370.818008] [] ? prepare_to_wait+0x2f/0x90 [ 370.818010] [] __might_sleep+0x7f/0x90 [ 370.818014] [] raid1_unplug+0xd3/0x170 [raid1] [ 370.818024] [] blk_flush_plug_list+0x8a/0x1e0 [ 370.818028] [] ? bit_wait+0x50/0x50 [ 370.818031] [] io_schedule_timeout+0x130/0x140 [ 370.818033] [] bit_wait_io+0x36/0x50 [ 370.818034] [] __wait_on_bit+0x65/0x90 [ 370.818041] [] ? ext4_read_block_bitmap_nowait+0xbc/0x630 [ 370.818043] [] ? bit_wait+0x50/0x50 [ 370.818045] [] out_of_line_wait_on_bit+0x72/0x80 [ 370.818047] [] ? autoremove_wake_function+0x40/0x40 [ 370.818050] [] __wait_on_buffer+0x44/0x50 [ 370.818053] [] ext4_wait_block_bitmap+0xe0/0xf0 [ 370.818058] [] ext4_mb_init_cache+0x206/0x790 [ 370.818062] [] ? lru_cache_add+0x1c/0x50 [ 370.818064] [] ext4_mb_init_group+0x11e/0x200 [ 370.818066] [] ext4_mb_load_buddy+0x341/0x360 [ 370.818068] [] ext4_mb_find_by_goal+0x93/0x2f0 [ 370.818070] [] ? ext4_mb_normalize_request+0x1e4/0x5b0 [ 370.818072] [] ext4_mb_regular_allocator+0x67/0x460 [ 370.818074] [] ? ext4_mb_normalize_request+0x1e4/0x5b0 [ 370.818076] [] ext4_mb_new_blocks+0x4cb/0x620 [ 370.818079] [] ext4_ext_map_blocks+0x4c6/0x14d0 [ 370.818081] [] ? ext4_es_lookup_extent+0x4e/0x290 [ 370.818085] [] ext4_map_blocks+0x14d/0x4f0 [ 370.818088] [] ext4_writepages+0x76d/0xe50 [ 370.818094] [] do_writepages+0x21/0x50 [ 370.818097] [] __writeback_single_inode+0x60/0x490 [ 370.818099] [] writeback_sb_inodes+0x2da/0x590 [ 370.818103] [] ? trylock_super+0x1b/0x50 [ 370.818105] [] ? trylock_super+0x1b/0x50 [ 370.818107] [] __writeback_inodes_wb+0x9f/0xd0 [ 370.818109] [] wb_writeback+0x34b/0x3c0 [ 370.818111] [] bdi_writeback_workfn+0x23f/0x550 [ 370.818116] [] process_one_work+0x1c8/0x570 [ 370.818117] [] ? process_one_work+0x14b/0x570 [ 370.818119] [] worker_thread+0x11b/0x470 [ 370.818121] [] ? process_one_work+0x570/0x570 [ 370.818124] [] kthread+0xf8/0x110 [ 370.818126] [] ? kthread_create_on_node+0x210/0x210 [ 370.818129] [] ret_from_fork+0x42/0x70 [ 370.818131] [] ? kthread_create_on_node+0x210/0x210 [ 370.818132] ---[ end trace 7b4deb71e68b6605 ]--- V2: don't change ->in_iowait Cc: NeilBrown Signed-off-by: Shaohua Li Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- kernel/sched/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe22f7510bce..cfeebb499e79 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4387,10 +4387,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); From a94ef4ed710e0864c9720ff3f30ceb051aeb8377 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 27 Apr 2015 16:37:39 +0200 Subject: [PATCH 172/481] hwmon: Update the location of my quilt tree This new location was supposed to be temporary, but a couple years have elapsed and it's still there, so apparently it's there to stay. Signed-off-by: Jean Delvare Signed-off-by: Guenter Roeck --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f8e0afb708b4..60d24f224271 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4536,7 +4536,7 @@ M: Jean Delvare M: Guenter Roeck L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ -T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ +T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained F: Documentation/hwmon/ From ab3f02fc237211f0583c1e7ba3bf504747be9b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:52:27 +0200 Subject: [PATCH 173/481] locking/arch: Add WRITE_ONCE() to set_mb() Since we assume set_mb() to result in a single store followed by a full memory barrier, employ WRITE_ONCE(). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 2 +- arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 2 +- arch/x86/um/asm/barrier.h | 2 +- include/asm-generic/barrier.h | 2 +- include/linux/compiler.h | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index d2f81e6b8c1c..993150aea681 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 71f19c4dc0de..ff7de78d01b8 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index f6769eb2bbf9..03117e7b2ab8 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -82,7 +82,7 @@ do { \ * acquire vs release semantics but we can't discuss this stuff with * Linus just yet. Grrr... */ -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index d703d8e26a65..97eb018a2933 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 2b8bbbcb9be0..cff1bbdaa74a 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -113,7 +113,7 @@ #endif #define set_mb(var, value) \ - do { var = value; smp_mb(); } while (0) + do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index a3bf5be111ff..2a072e48780d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 8d724718ec21..b66cd53d35fc 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 76648941fea7..125fec7512f4 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -41,7 +41,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_wmb() wmb() #define set_mb(__var, __value) \ - do { __var = __value; membar_safe("#StoreLoad"); } while(0) + do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP #define smp_mb() mb() diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 959e45b81fe2..9de5cde133a1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7e8a1a650435..cc0cb01f346d 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index f5c40b0fadc2..3938716b44d7 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -67,7 +67,7 @@ #endif #ifndef set_mb -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a7c0941d10da..03e227ba481c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #define WRITE_ONCE(x, val) \ - ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #endif /* __KERNEL__ */ From b92b8b35a2e38bde319fd1d68ec84628c1f1b0fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:51:55 +0200 Subject: [PATCH 174/481] locking/arch: Rename set_mb() to smp_store_mb() Since set_mb() is really about an smp_mb() -- not a IO/DMA barrier like mb() rename it to match the recent smp_load_acquire() and smp_store_release(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 6 +++--- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 7 +------ arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sh/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 4 ++-- arch/x86/um/asm/barrier.h | 3 ++- fs/select.c | 6 +++--- include/asm-generic/barrier.h | 4 ++-- include/linux/sched.h | 8 ++++---- kernel/futex.c | 2 +- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/sched/wait.c | 4 ++-- 18 files changed, 29 insertions(+), 33 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f95746189b5d..fe4020e4b468 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1662,7 +1662,7 @@ CPU from reordering them. There are some more advanced barrier functions: - (*) set_mb(var, value) + (*) smp_store_mb(var, value) This assigns the value to the variable and then inserts a full memory barrier after it, depending on the function. It isn't guaranteed to @@ -1975,7 +1975,7 @@ after it has altered the task state: CPU 1 =============================== set_current_state(); - set_mb(); + smp_store_mb(); STORE current->state LOAD event_indicated @@ -2016,7 +2016,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING: CPU 1 CPU 2 =============================== =============================== set_current_state(); STORE event_indicated - set_mb(); wake_up(); + smp_store_mb(); wake_up(); STORE current->state STORE current->state LOAD event_indicated diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 993150aea681..6c2327e1c732 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index ff7de78d01b8..0fa47c4275cb 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 03117e7b2ab8..843ba435e43b 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -77,12 +77,7 @@ do { \ ___p1; \ }) -/* - * XXX check on this ---I suspect what Linus really wants here is - * acquire vs release semantics but we can't discuss this stuff with - * Linus just yet. Grrr... - */ -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 97eb018a2933..5a696e507930 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index cff1bbdaa74a..7ecba84656d4 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -112,7 +112,7 @@ #define __WEAK_LLSC_MB " \n" #endif -#define set_mb(var, value) \ +#define smp_store_mb(var, value) \ do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 2a072e48780d..39505d660a70 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b66cd53d35fc..e6f8615a11eb 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h index 43715308b068..bf91037db4e0 100644 --- a/arch/sh/include/asm/barrier.h +++ b/arch/sh/include/asm/barrier.h @@ -32,7 +32,7 @@ #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #endif -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #include diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 125fec7512f4..809941e33e12 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -40,7 +40,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_rmb() rmb() #define dma_wmb() wmb() -#define set_mb(__var, __value) \ +#define smp_store_mb(__var, __value) \ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 9de5cde133a1..e51a8f803f55 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -35,12 +35,12 @@ #define smp_mb() mb() #define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc0cb01f346d..b9531d343134 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,8 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) + +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/fs/select.c b/fs/select.c index f684c750e08a..015547330e88 100644 --- a/fs/select.c +++ b/fs/select.c @@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in poll_schedule_timeout. + * and is paired with smp_store_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1; @@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, /* * Prepare for the next iteration. * - * The following set_mb() serves two purposes. First, it's + * The following smp_store_mb() serves two purposes. First, it's * the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing @@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ - set_mb(pwq->triggered, 0); + smp_store_mb(pwq->triggered, 0); return rc; } diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 3938716b44d7..e6a83d712ef6 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -66,8 +66,8 @@ #define smp_read_barrier_depends() do { } while (0) #endif -#ifndef set_mb -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#ifndef smp_store_mb +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..18f197223ebd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - set_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) /* @@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - set_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else @@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) + smp_store_mb((tsk)->state, (state_value)) /* * set_current_state() includes a barrier so that the write of current->state @@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) #define set_current_state(state_value) \ - set_mb(current->state, (state_value)) + smp_store_mb(current->state, (state_value)) #endif diff --git a/kernel/futex.c b/kernel/futex.c index 2579e407ff67..55ca63ad9622 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, { /* * The task state is guaranteed to be set before another task can - * wake it. set_current_state() is implemented using set_mb() and + * wake it. set_current_state() is implemented using smp_store_mb() and * queue_me() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 27ab96dca68c..04ab18151cc8 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -175,7 +175,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * * Matches the xchg() from pv_kick_node(). */ - set_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, vcpu_halted); if (!READ_ONCE(node->locked)) pv_wait(&pn->state, vcpu_halted); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 852143a79f36..9bc82329eaad 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss * an event. */ - set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ + smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ return timeout; } @@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expects write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in wait_woken(). + * and is paired with smp_store_mb() in wait_woken(). */ smp_wmb(); /* C */ wait->flags |= WQ_FLAG_WOKEN; From 54da691deb123c045259ebf4f5c67381244f58f1 Mon Sep 17 00:00:00 2001 From: Thomas Gummerer Date: Thu, 14 May 2015 09:16:39 +0200 Subject: [PATCH 175/481] drm/i915: fix screen flickering Commit c9f038a1a592 ("drm/i915: Don't assume primary & cursor are always on for wm calculation (v4)") fixes a null pointer dereference. Setting the primary and cursor panes to false in ilk_compute_wm_parameters to false does however give the following errors in the kernel log and causes the screen to flicker. [ 101.133716] [drm:intel_set_cpu_fifo_underrun_reporting [i915]] *ERROR* uncleared fifo underrun on pipe A [ 101.133725] [drm:intel_cpu_fifo_underrun_irq_handler [i915]] *ERROR* CPU pipe A FIFO underrun Always setting the panes to enabled fixes this error. Helped-by: Matt Roper Signed-off-by: Thomas Gummerer Reviewed-by: Matt Roper Tested-by: Mario Kleiner Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fa4ccb346389..555b896d2bda 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2045,22 +2045,20 @@ static void ilk_compute_wm_parameters(struct drm_crtc *crtc, p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal; p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc); - if (crtc->primary->state->fb) { - p->pri.enabled = true; + if (crtc->primary->state->fb) p->pri.bytes_per_pixel = crtc->primary->state->fb->bits_per_pixel / 8; - } else { - p->pri.enabled = false; - p->pri.bytes_per_pixel = 0; - } + else + p->pri.bytes_per_pixel = 4; + + p->cur.bytes_per_pixel = 4; + /* + * TODO: for now, assume primary and cursor planes are always enabled. + * Setting them to false makes the screen flicker. + */ + p->pri.enabled = true; + p->cur.enabled = true; - if (crtc->cursor->state->fb) { - p->cur.enabled = true; - p->cur.bytes_per_pixel = 4; - } else { - p->cur.enabled = false; - p->cur.bytes_per_pixel = 0; - } p->pri.horiz_pixels = intel_crtc->config->pipe_src_w; p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w; From 13a988396cab68e6c6afd33e461a9e52ce23ff63 Mon Sep 17 00:00:00 2001 From: Koro Chen Date: Wed, 13 May 2015 22:39:03 +0800 Subject: [PATCH 176/481] ALSA: pcm: Modify double acknowledged interrupts check condition Currently in snd_pcm_update_hw_ptr0 during interrupt, we consider there were double acknowledged interrupts when: 1. HW reported pointer is smaller than expected, and 2. Time from last update time (hdelta) is over half a buffer time. However, when HW reported pointer is only a few bytes smaller than expected, and when hdelta is just a little larger than half a buffer time (e.g. ping-pong buffer), it wrongly treats this IRQ as double acknowledged. The condition #2 uses jiffies, but jiffies is not high resolution since it is integer. We should consider jiffies inaccuracy. Signed-off-by: Koro Chen Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index ac6b33f3779c..7d45645f10ba 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -339,7 +339,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, if (delta > new_hw_ptr) { /* check for double acknowledged interrupts */ hdelta = curr_jiffies - runtime->hw_ptr_jiffies; - if (hdelta > runtime->hw_ptr_buffer_jiffies/2) { + if (hdelta > runtime->hw_ptr_buffer_jiffies/2 + 1) { hw_base += runtime->buffer_size; if (hw_base >= runtime->boundary) { hw_base = 0; From 7cded342c09f633666e71ee1ce048f218a9c5836 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 13 May 2015 14:33:22 +0200 Subject: [PATCH 177/481] s390/mm: correct return value of pmd_pfn Git commit 152125b7a882df36a55a8eadbea6d0edf1461ee7 "s390/mm: implement dirty bits for large segment table entries" broke the pmd_pfn function, it changed the return value from 'unsigned long' to 'int'. This breaks all machine configurations with memory above the 8TB line. Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fc642399b489..ef24a212eeb7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } -static inline int pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { unsigned long origin_mask; From 75e9143927a893aebad18b79e1bbeff4892c0d35 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Wed, 13 May 2015 17:06:23 -0700 Subject: [PATCH 178/481] pinctrl: cygnus: fixed incorrect GPIO-pin mapping This patch fixes an incorrect GPIO-to-pin mapping in the Cygnus GPIO driver Signed-off-by: Ray Jui Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c index 4ad5c1a996e3..e406e3d8c1c7 100644 --- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c @@ -643,7 +643,9 @@ static const struct cygnus_gpio_pin_range cygnus_gpio_pintable[] = { CYGNUS_PINRANGE(87, 104, 12), CYGNUS_PINRANGE(99, 102, 2), CYGNUS_PINRANGE(101, 90, 4), - CYGNUS_PINRANGE(105, 116, 10), + CYGNUS_PINRANGE(105, 116, 6), + CYGNUS_PINRANGE(111, 100, 2), + CYGNUS_PINRANGE(113, 122, 4), CYGNUS_PINRANGE(123, 11, 1), CYGNUS_PINRANGE(124, 38, 4), CYGNUS_PINRANGE(128, 43, 1), From fa94b0d72511add5822dc4124f2a3eae66b8863f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 May 2015 10:46:49 +0200 Subject: [PATCH 179/481] ALSA: usb-audio: Add quirk for MS LifeCam Studio Microsoft LifeCam Studio (045e:0772) needs a similar quirk for suppressing the wrong sample rate inquiry. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=98481 Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7c5a70139278..46facfc9aec1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1117,6 +1117,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) switch (chip->usb_id) { case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ return true; } From 984cffdeaeb7ea5a21f49a89f638d84d62d08992 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Sun, 17 May 2015 17:57:38 +0200 Subject: [PATCH 180/481] pinctrl: Fix gpio/pin mapping for Meson8b The num_pins field in the struct meson_domain_data must include also the missing pins in the Meson8b SoC, otherwise the GPIO <-> pin mapping is broken on this platform. Avoid also the dinamic allocation for GPIOs. Signed-off-by: Carlo Caione Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- drivers/pinctrl/meson/pinctrl-meson8b.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index edcd140e0899..a70a5fe79d44 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -569,7 +569,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc) domain->chip.direction_output = meson_gpio_direction_output; domain->chip.get = meson_gpio_get; domain->chip.set = meson_gpio_set; - domain->chip.base = -1; + domain->chip.base = domain->data->pin_base; domain->chip.ngpio = domain->data->num_pins; domain->chip.can_sleep = false; domain->chip.of_node = domain->of_node; diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index 2f7ea6229880..9677807db364 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -876,13 +876,13 @@ static struct meson_domain_data meson8b_domain_data[] = { .banks = meson8b_banks, .num_banks = ARRAY_SIZE(meson8b_banks), .pin_base = 0, - .num_pins = 83, + .num_pins = 130, }, { .name = "ao-bank", .banks = meson8b_ao_banks, .num_banks = ARRAY_SIZE(meson8b_ao_banks), - .pin_base = 83, + .pin_base = 130, .num_pins = 16, }, }; From cc6f67bcafcb6bbbb2d1be1603dcd95125a52800 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2015 14:30:12 +0200 Subject: [PATCH 181/481] ovl: mount read-only if workdir can't be created OpenWRT folks reported that overlayfs fails to mount if upper fs is full, because workdir can't be created. Wordir creation can fail for various other reasons too. There's no reason that the mount itself should fail, overlayfs can work fine without a workdir, as long as the overlay isn't modified. So mount it read-only and don't allow remounting read-write. Add a couple of WARN_ON()s for the impossible case of workdir being used despite being read-only. Reported-by: Bastian Bittorf Signed-off-by: Miklos Szeredi Cc: # v3.18+ --- fs/overlayfs/copy_up.c | 3 +++ fs/overlayfs/dir.c | 9 +++++++++ fs/overlayfs/super.c | 10 +++++----- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 24f640441bd9..84d693d37428 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct cred *override_cred; char *link = NULL; + if (WARN_ON(!workdir)) + return -EROFS; + ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 2578a0c0677d..692ceda3bc21 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, struct kstat stat; int err; + if (WARN_ON(!workdir)) + return ERR_PTR(-EROFS); + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; + if (WARN_ON(!workdir)) + return -EROFS; + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -506,6 +512,9 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; + if (WARN_ON(!workdir)) + return -EROFS; + if (is_dir) { if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { opaquedir = ovl_check_empty_and_clear(dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5f0d1993e6e3..bf8537c7f455 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) + if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) return -EROFS; return 0; @@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); err = PTR_ERR(ufs->workdir); if (IS_ERR(ufs->workdir)) { - pr_err("overlayfs: failed to create directory %s/%s\n", - ufs->config.workdir, OVL_WORKDIR_NAME); - goto out_put_upper_mnt; + pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + ufs->config.workdir, OVL_WORKDIR_NAME, -err); + sb->s_flags |= MS_RDONLY; + ufs->workdir = NULL; } } @@ -997,7 +998,6 @@ out_put_lower_mnt: kfree(ufs->lower_mnt); out_put_workdir: dput(ufs->workdir); -out_put_upper_mnt: mntput(ufs->upper_mnt); out_put_lowerpath: for (i = 0; i < numlower; i++) From 22d3a3c829fa9ecdb493d1f1f2838d543f8d86a3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 May 2015 15:40:21 +0200 Subject: [PATCH 182/481] mac80211: don't use napi_gro_receive() outside NAPI context No matter how the driver manages its NAPI context, there's no way sending frames to it from a timer can be correct, since it would corrupt the internal GRO lists. To avoid that, always use the non-NAPI path when releasing frames from the timer. Cc: stable@vger.kernel.org Reported-by: Jean Trivelly Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/rx.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ab46ab4a7249..cdc374296245 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -205,6 +205,8 @@ enum ieee80211_packet_rx_flags { * @IEEE80211_RX_CMNTR: received on cooked monitor already * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported * to cfg80211_report_obss_beacon(). + * @IEEE80211_RX_REORDER_TIMER: this frame is released by the + * reorder buffer timeout timer, not the normal RX path * * These flags are used across handling multiple interfaces * for a single frame. @@ -212,6 +214,7 @@ enum ieee80211_packet_rx_flags { enum ieee80211_rx_flags { IEEE80211_RX_CMNTR = BIT(0), IEEE80211_RX_BEACON_REPORTED = BIT(1), + IEEE80211_RX_REORDER_TIMER = BIT(2), }; struct ieee80211_rx_data { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 260eed45b6d2..5793f75c5ffd 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2121,7 +2121,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->local->napi) + if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) && + rx->local->napi) napi_gro_receive(rx->local->napi, skb); else netif_receive_skb(skb); @@ -3231,7 +3232,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .flags = 0, + .flags = IEEE80211_RX_REORDER_TIMER, }; struct tid_ampdu_rx *tid_agg_rx; From d10ebb9f136669a1e9fa388fc450bf1822c93dd5 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 27 Apr 2015 23:10:13 +0200 Subject: [PATCH 183/481] drm/exynos: fb: use drm_format_num_planes to get buffer count The previous code had some special case handling for the buffer count in exynos_drm_format_num_buffers(). This code was incorrect though, since this special case doesn't exist for DRM. It stemmed from the existence of the special NV12M V4L2 format. NV12 is a bi-planar format (separate planes for luma and chroma) and V4L2 differentiates between a NV12 buffer where luma and chroma is contiguous in memory (so no data between luma/chroma), and a NV12 buffer where luma and chroma have two explicit memory locations (which is then called NV12M). This distinction doesn't exist for DRM. A bi-planar format always explicitly comes with the information about its two planes (even if these planes should be contiguous). Signed-off-by: Tobias Jakobi Acked-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 39 +------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 929cb03a8eab..142eb4e3f59e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -171,43 +171,6 @@ exynos_drm_framebuffer_init(struct drm_device *dev, return &exynos_fb->fb; } -static u32 exynos_drm_format_num_buffers(struct drm_mode_fb_cmd2 *mode_cmd) -{ - unsigned int cnt = 0; - - if (mode_cmd->pixel_format != DRM_FORMAT_NV12) - return drm_format_num_planes(mode_cmd->pixel_format); - - while (cnt != MAX_FB_BUFFER) { - if (!mode_cmd->handles[cnt]) - break; - cnt++; - } - - /* - * check if NV12 or NV12M. - * - * NV12 - * handles[0] = base1, offsets[0] = 0 - * handles[1] = base1, offsets[1] = Y_size - * - * NV12M - * handles[0] = base1, offsets[0] = 0 - * handles[1] = base2, offsets[1] = 0 - */ - if (cnt == 2) { - /* - * in case of NV12 format, offsets[1] is not 0 and - * handles[0] is same as handles[1]. - */ - if (mode_cmd->offsets[1] && - mode_cmd->handles[0] == mode_cmd->handles[1]) - cnt = 1; - } - - return cnt; -} - static struct drm_framebuffer * exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, struct drm_mode_fb_cmd2 *mode_cmd) @@ -230,7 +193,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd); exynos_fb->exynos_gem_obj[0] = to_exynos_gem_obj(obj); - exynos_fb->buf_cnt = exynos_drm_format_num_buffers(mode_cmd); + exynos_fb->buf_cnt = drm_format_num_planes(mode_cmd->pixel_format); DRM_DEBUG_KMS("buf_cnt = %d\n", exynos_fb->buf_cnt); From 5d878bdb51bd7915ba3def8b531238c67624aa58 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 27 Apr 2015 23:10:14 +0200 Subject: [PATCH 184/481] drm/exynos: plane: honor buffer offset for dma_addr Previously we were ignoring the buffer offsets that are passed through the addfb2 ioctl. This didn't cause any major issues, since for uni-planar formats (like XRGB8888) userspace would most of the time just use offsets[0]=0. However with NV12 offsets[1] is very likely non-zero. So properly apply the offsets to our dma addresses. Signed-off-by: Tobias Jakobi Acked-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 13ea3349363b..b1180fbe7546 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -76,7 +76,7 @@ int exynos_check_plane(struct drm_plane *plane, struct drm_framebuffer *fb) return -EFAULT; } - exynos_plane->dma_addr[i] = buffer->dma_addr; + exynos_plane->dma_addr[i] = buffer->dma_addr + fb->offsets[i]; DRM_DEBUG_KMS("buffer: %d, dma_addr = 0x%lx\n", i, (unsigned long)exynos_plane->dma_addr[i]); From fac8a5b25f5b7954ba510727caadbd9f7839a958 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 27 Apr 2015 23:10:15 +0200 Subject: [PATCH 185/481] drm/exynos: mixer: remove buffer count handling in vp_video_buffer() The video processor (VP) supports four formats: NV12, NV21 and its tiled variants. All these formats are bi-planar, so the buffer count in vp_video_buffer() is always 2. Also properly exit if we're called with an invalid (non-VP) pixelformat. Signed-off-by: Tobias Jakobi Acked-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index fbec750574e6..1e8ce9ee039b 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -382,7 +382,6 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) struct mixer_resources *res = &ctx->mixer_res; unsigned long flags; struct exynos_drm_plane *plane; - unsigned int buf_num = 1; dma_addr_t luma_addr[2], chroma_addr[2]; bool tiled_mode = false; bool crcb_mode = false; @@ -393,27 +392,15 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) switch (plane->pixel_format) { case DRM_FORMAT_NV12: crcb_mode = false; - buf_num = 2; break; - /* TODO: single buffer format NV12, NV21 */ default: - /* ignore pixel format at disable time */ - if (!plane->dma_addr[0]) - break; - DRM_ERROR("pixel format for vp is wrong [%d].\n", plane->pixel_format); return; } - if (buf_num == 2) { - luma_addr[0] = plane->dma_addr[0]; - chroma_addr[0] = plane->dma_addr[1]; - } else { - luma_addr[0] = plane->dma_addr[0]; - chroma_addr[0] = plane->dma_addr[0] - + (plane->pitch * plane->fb_height); - } + luma_addr[0] = plane->dma_addr[0]; + chroma_addr[0] = plane->dma_addr[1]; if (plane->scan_flag & DRM_MODE_FLAG_INTERLACE) { ctx->interlace = true; From 8f2590f8e3a7aeeaf2ff5a875684fc6790ae58b5 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 27 Apr 2015 23:10:16 +0200 Subject: [PATCH 186/481] drm/exynos: mixer: also allow NV21 for the video processor All the necessary code is already there, just need to handle the format in the switch statement. Signed-off-by: Tobias Jakobi Acked-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 1e8ce9ee039b..adb5ec1b51ae 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -393,6 +393,9 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) case DRM_FORMAT_NV12: crcb_mode = false; break; + case DRM_FORMAT_NV21: + crcb_mode = true; + break; default: DRM_ERROR("pixel format for vp is wrong [%d].\n", plane->pixel_format); From 7a57ca7c89ad7e61f8ca45fa1fa892f0d5102808 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 27 Apr 2015 23:11:59 +0200 Subject: [PATCH 187/481] drm/exynos: mixer: cleanup pixelformat handling Move the defines for the pixelformats that the mixer supports out of mixer_graph_buffer() to the top of the source. Then select the mixer pixelformat (pf) in mixer_graph_buffer() based on the plane's pf (and not bpp). Also add handling of RGB565 and XRGB1555 to the switch statement and exit early if the plane has an unsupported pf. Partially based on 'drm/exynos: enable/disable blend based on pixel format' by Gustavo Padovan . v2: Use the shorter MXR_FORMAT as prefix. v3: Re-add ARGB8888 because of compatibility reasons (suggested by Joonyoung Shim). Reviewed-by: Gustavo Padovan Signed-off-by: Tobias Jakobi Acked-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index adb5ec1b51ae..1bcbcfd5f8c3 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -44,6 +44,12 @@ #define MIXER_WIN_NR 3 #define MIXER_DEFAULT_WIN 0 +/* The pixelformats that are natively supported by the mixer. */ +#define MXR_FORMAT_RGB565 4 +#define MXR_FORMAT_ARGB1555 5 +#define MXR_FORMAT_ARGB4444 6 +#define MXR_FORMAT_ARGB8888 7 + struct mixer_resources { int irq; void __iomem *mixer_regs; @@ -521,20 +527,27 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win) plane = &ctx->planes[win]; - #define RGB565 4 - #define ARGB1555 5 - #define ARGB4444 6 - #define ARGB8888 7 + switch (plane->pixel_format) { + case DRM_FORMAT_XRGB4444: + fmt = MXR_FORMAT_ARGB4444; + break; - switch (plane->bpp) { - case 16: - fmt = ARGB4444; + case DRM_FORMAT_XRGB1555: + fmt = MXR_FORMAT_ARGB1555; break; - case 32: - fmt = ARGB8888; + + case DRM_FORMAT_RGB565: + fmt = MXR_FORMAT_RGB565; break; + + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + fmt = MXR_FORMAT_ARGB8888; + break; + default: - fmt = ARGB8888; + DRM_DEBUG_KMS("pixelformat unsupported by mixer\n"); + return; } /* check if mixer supports requested scaling setup */ From d6b163026c5a6f2e49bc59261b69d44465ebddb4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 2 May 2015 00:56:36 +0900 Subject: [PATCH 188/481] drm/exynos: mixer: Constify platform_device_id The platform_device_id is not modified by the driver and core uses it as const. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 1bcbcfd5f8c3..1a07fdd3e39d 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1159,7 +1159,7 @@ static struct mixer_drv_data exynos4210_mxr_drv_data = { .has_sclk = 1, }; -static struct platform_device_id mixer_driver_types[] = { +static const struct platform_device_id mixer_driver_types[] = { { .name = "s5p-mixer", .driver_data = (unsigned long)&exynos4210_mxr_drv_data, From 48107d7b0db180155b19b2cf083517014289a079 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 May 2015 09:04:44 +0900 Subject: [PATCH 189/481] drm/exynos: Fix build breakage on !DRM_EXYNOS_FIMD Disabling the CONFIG_DRM_EXYNOS_FIMD (e.g. by enabling of CONFIG_FB_S3C) leads to build error: drivers/built-in.o: In function `exynos_dp_dpms': binder.c:(.text+0xd6a840): undefined reference to `fimd_dp_clock_enable' binder.c:(.text+0xd6ab54): undefined reference to `fimd_dp_clock_enable' Fix this by changing direct call to fimd_dp_clock_enable() into optional call to exynos_drm_crtc_ops->clock_enable(). Only the DRM_EXYNOS_FIMD implements this op. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_dp_core.c | 11 +++++-- drivers/gpu/drm/exynos/exynos_drm_drv.h | 5 ++++ drivers/gpu/drm/exynos/exynos_drm_fimd.c | 37 ++++++++++++------------ drivers/gpu/drm/exynos/exynos_drm_fimd.h | 15 ---------- 4 files changed, 31 insertions(+), 37 deletions(-) delete mode 100644 drivers/gpu/drm/exynos/exynos_drm_fimd.h diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c index 1dbfba58f909..441ef06b8894 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.c +++ b/drivers/gpu/drm/exynos/exynos_dp_core.c @@ -32,7 +32,6 @@ #include #include "exynos_dp_core.h" -#include "exynos_drm_fimd.h" #define ctx_from_connector(c) container_of(c, struct exynos_dp_device, \ connector) @@ -1066,6 +1065,8 @@ static void exynos_dp_phy_exit(struct exynos_dp_device *dp) static void exynos_dp_poweron(struct exynos_dp_device *dp) { + struct exynos_drm_crtc *crtc = dp_to_crtc(dp); + if (dp->dpms_mode == DRM_MODE_DPMS_ON) return; @@ -1076,7 +1077,8 @@ static void exynos_dp_poweron(struct exynos_dp_device *dp) } } - fimd_dp_clock_enable(dp_to_crtc(dp), true); + if (crtc->ops->clock_enable) + crtc->ops->clock_enable(dp_to_crtc(dp), true); clk_prepare_enable(dp->clock); exynos_dp_phy_init(dp); @@ -1087,6 +1089,8 @@ static void exynos_dp_poweron(struct exynos_dp_device *dp) static void exynos_dp_poweroff(struct exynos_dp_device *dp) { + struct exynos_drm_crtc *crtc = dp_to_crtc(dp); + if (dp->dpms_mode != DRM_MODE_DPMS_ON) return; @@ -1102,7 +1106,8 @@ static void exynos_dp_poweroff(struct exynos_dp_device *dp) exynos_dp_phy_exit(dp); clk_disable_unprepare(dp->clock); - fimd_dp_clock_enable(dp_to_crtc(dp), false); + if (crtc->ops->clock_enable) + crtc->ops->clock_enable(dp_to_crtc(dp), false); if (dp->panel) { if (drm_panel_unprepare(dp->panel)) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index e12ecb5d5d9a..44f128a03aea 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -181,6 +181,10 @@ struct exynos_drm_display { * @win_disable: disable hardware specific overlay. * @te_handler: trigger to transfer video image at the tearing effect * synchronization signal if there is a page flip request. + * @clock_enable: optional function enabling/disabling display domain clock, + * called from exynos-dp driver before powering up (with + * 'enable' argument as true) and after powering down (with + * 'enable' as false). */ struct exynos_drm_crtc; struct exynos_drm_crtc_ops { @@ -195,6 +199,7 @@ struct exynos_drm_crtc_ops { void (*win_commit)(struct exynos_drm_crtc *crtc, unsigned int zpos); void (*win_disable)(struct exynos_drm_crtc *crtc, unsigned int zpos); void (*te_handler)(struct exynos_drm_crtc *crtc); + void (*clock_enable)(struct exynos_drm_crtc *crtc, bool enable); }; /* diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 9819fa6a9e2a..2fb95ccb5841 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -33,7 +33,6 @@ #include "exynos_drm_crtc.h" #include "exynos_drm_plane.h" #include "exynos_drm_iommu.h" -#include "exynos_drm_fimd.h" /* * FIMD stands for Fully Interactive Mobile Display and @@ -946,6 +945,23 @@ static void fimd_te_handler(struct exynos_drm_crtc *crtc) drm_handle_vblank(ctx->drm_dev, ctx->pipe); } +static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) +{ + struct fimd_context *ctx = crtc->ctx; + u32 val; + + /* + * Only Exynos 5250, 5260, 5410 and 542x requires enabling DP/MIE + * clock. On these SoCs the bootloader may enable it but any + * power domain off/on will reset it to disable state. + */ + if (ctx->driver_data != &exynos5_fimd_driver_data) + return; + + val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON); +} + static struct exynos_drm_crtc_ops fimd_crtc_ops = { .dpms = fimd_dpms, .mode_fixup = fimd_mode_fixup, @@ -956,6 +972,7 @@ static struct exynos_drm_crtc_ops fimd_crtc_ops = { .win_commit = fimd_win_commit, .win_disable = fimd_win_disable, .te_handler = fimd_te_handler, + .clock_enable = fimd_dp_clock_enable, }; static irqreturn_t fimd_irq_handler(int irq, void *dev_id) @@ -1192,24 +1209,6 @@ static int fimd_remove(struct platform_device *pdev) return 0; } -void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) -{ - struct fimd_context *ctx = crtc->ctx; - u32 val; - - /* - * Only Exynos 5250, 5260, 5410 and 542x requires enabling DP/MIE - * clock. On these SoCs the bootloader may enable it but any - * power domain off/on will reset it to disable state. - */ - if (ctx->driver_data != &exynos5_fimd_driver_data) - return; - - val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; - writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON); -} -EXPORT_SYMBOL_GPL(fimd_dp_clock_enable); - struct platform_driver fimd_driver = { .probe = fimd_probe, .remove = fimd_remove, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.h b/drivers/gpu/drm/exynos/exynos_drm_fimd.h deleted file mode 100644 index b4fcaa568456..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) 2015 Samsung Electronics Co., Ltd. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_FIMD_H_ -#define _EXYNOS_DRM_FIMD_H_ - -extern void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable); - -#endif /* _EXYNOS_DRM_FIMD_H_ */ From f3aaf7624463721af27f13cc083916c54ffbee70 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 May 2015 09:04:45 +0900 Subject: [PATCH 190/481] drm/exynos: Constify exynos_drm_crtc_ops The Exynos DRM code does not modify the ops provided by CRTC driver in exynos_drm_crtc_create() call. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_crtc.c | 10 +++++----- drivers/gpu/drm/exynos/exynos_drm_crtc.h | 10 +++++----- drivers/gpu/drm/exynos/exynos_drm_drv.h | 2 +- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 2 +- drivers/gpu/drm/exynos/exynos_mixer.c | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 1f7e33f59de6..3d00df76220d 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -710,7 +710,7 @@ static void decon_dpms(struct exynos_drm_crtc *crtc, int mode) } } -static struct exynos_drm_crtc_ops decon_crtc_ops = { +static const struct exynos_drm_crtc_ops decon_crtc_ops = { .dpms = decon_dpms, .mode_fixup = decon_mode_fixup, .commit = decon_commit, diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index eb49195cec5c..9006b947e03c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -238,11 +238,11 @@ static struct drm_crtc_funcs exynos_crtc_funcs = { }; struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev, - struct drm_plane *plane, - int pipe, - enum exynos_drm_output_type type, - struct exynos_drm_crtc_ops *ops, - void *ctx) + struct drm_plane *plane, + int pipe, + enum exynos_drm_output_type type, + const struct exynos_drm_crtc_ops *ops, + void *ctx) { struct exynos_drm_crtc *exynos_crtc; struct exynos_drm_private *private = drm_dev->dev_private; diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h index 0ecd8fc45cff..0f3aa70818e3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h @@ -18,11 +18,11 @@ #include "exynos_drm_drv.h" struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev, - struct drm_plane *plane, - int pipe, - enum exynos_drm_output_type type, - struct exynos_drm_crtc_ops *ops, - void *context); + struct drm_plane *plane, + int pipe, + enum exynos_drm_output_type type, + const struct exynos_drm_crtc_ops *ops, + void *context); int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int pipe); void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int pipe); void exynos_drm_crtc_finish_pageflip(struct drm_device *dev, int pipe); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 44f128a03aea..0e951226ca06 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -226,7 +226,7 @@ struct exynos_drm_crtc { unsigned int dpms; wait_queue_head_t pending_flip_queue; struct drm_pending_vblank_event *event; - struct exynos_drm_crtc_ops *ops; + const struct exynos_drm_crtc_ops *ops; void *ctx; }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 2fb95ccb5841..5c1a148525f8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -962,7 +962,7 @@ static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON); } -static struct exynos_drm_crtc_ops fimd_crtc_ops = { +static const struct exynos_drm_crtc_ops fimd_crtc_ops = { .dpms = fimd_dpms, .mode_fixup = fimd_mode_fixup, .commit = fimd_commit, diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 27e84ec21694..1b3479a8db5f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -217,7 +217,7 @@ static int vidi_ctx_initialize(struct vidi_context *ctx, return 0; } -static struct exynos_drm_crtc_ops vidi_crtc_ops = { +static const struct exynos_drm_crtc_ops vidi_crtc_ops = { .dpms = vidi_dpms, .enable_vblank = vidi_enable_vblank, .disable_vblank = vidi_disable_vblank, diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 1a07fdd3e39d..31c1793bc97b 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1129,7 +1129,7 @@ int mixer_check_mode(struct drm_display_mode *mode) return -EINVAL; } -static struct exynos_drm_crtc_ops mixer_crtc_ops = { +static const struct exynos_drm_crtc_ops mixer_crtc_ops = { .dpms = mixer_dpms, .enable_vblank = mixer_enable_vblank, .disable_vblank = mixer_disable_vblank, From 362edccc7a1395bcd05d1f2b30f47a3439ffef5c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 May 2015 09:04:46 +0900 Subject: [PATCH 191/481] drm/exynos: Consolidate return statements in fimd_bind() Simplify the code and remove superfluous return statement. Just return the result of fimd_iommu_attach_devices(). Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 5c1a148525f8..d704f2ba7179 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -1042,12 +1042,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data) if (ctx->display) exynos_drm_create_enc_conn(drm_dev, ctx->display); - ret = fimd_iommu_attach_devices(ctx, drm_dev); - if (ret) - return ret; - - return 0; - + return fimd_iommu_attach_devices(ctx, drm_dev); } static void fimd_unbind(struct device *dev, struct device *master, From c0734fbaf49512dc62d544875bd9a4192329ce01 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 6 May 2015 14:10:21 +0200 Subject: [PATCH 192/481] drm/exynos: mixer: don't dump registers under spinlock mixer_regs_dump() was called in mixer_run(), which was called under the register spinlock in mixer_graph_buffer() and vp_video_buffer(). This would trigger a sysmmu pagefault with drm.debug=0xff because of the large delay caused by the register dumping. To keep consistency also move register dumping out of mixer_stop(), which is the counterpart to mixer_run(). Kernel dump: [ 131.296529] [drm:mixer_win_commit] win: 2 [ 131.300693] [drm:mixer_regs_dump] MXR_STATUS = 00000081 [ 131.305888] [drm:mixer_regs_dump] MXR_CFG = 000007d5 [ 131.310835] [drm:mixer_regs_dump] MXR_INT_EN = 00000000 [ 131.316043] [drm:mixer_regs_dump] MXR_INT_STATUS = 00000900 [ 131.321598] [drm:mixer_regs_dump] MXR_LAYER_CFG = 00000321 [ 131.327066] [drm:mixer_regs_dump] MXR_VIDEO_CFG = 00000000 [ 131.332535] [drm:mixer_regs_dump] MXR_GRAPHIC0_CFG = 00310700 [ 131.338263] [drm:mixer_regs_dump] MXR_GRAPHIC0_BASE = 20c00000 [ 131.344079] [drm:mixer_regs_dump] MXR_GRAPHIC0_SPAN = 00000780 [ 131.349895] [drm:mixer_regs_dump] MXR_GRAPHIC0_WH = 07800438 [ 131.355537] [drm:mixer_regs_dump] MXR_GRAPHIC0_SXY = 00000000 [ 131.361265] [drm:mixer_regs_dump] MXR_GRAPHIC0_DXY = 00000000 [ 131.366994] [drm:mixer_regs_dump] MXR_GRAPHIC1_CFG = 00000000 [ 131.372723] [drm:mixer_regs_dump] MXR_GRAPHIC1_BASE = 00000000 [ 131.378539] [drm:mixer_regs_dump] MXR_GRAPHIC1_SPAN = 00000000 [ 131.384354] [drm:mixer_regs_dump] MXR_GRAPHIC1_WH = 00000000 [ 131.389996] [drm:mixer_regs_dump] MXR_GRAPHIC1_SXY = 00000000 [ 131.395725] [drm:mixer_regs_dump] MXR_GRAPHIC1_DXY = 00000000 [ 131.401486] PAGE FAULT occurred at 0x0 by 12e20000.sysmmu(Page table base: 0x6d990000) [ 131.409353] Lv1 entry: 0x6e0f2401 [ 131.412753] ------------[ cut here ]------------ [ 131.417339] kernel BUG at drivers/iommu/exynos-iommu.c:358! [ 131.422894] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM [ 131.428709] Modules linked in: ecb bridge stp llc bnep btrfs xor xor_neon zlib_inflate zlib_deflate raid6_pq btusb bluetooth usb_storage s5p_jpeg videobuf2_dma_contig videobuf2_memops v4l2_mem2mem videobuf2_core [ 131.447461] CPU: 0 PID: 2418 Comm: lt-modetest Tainted: G W 4.0.1-debug+ #3 [ 131.455530] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 131.461607] task: ee194100 ti: ec4fe000 task.ti: ec4fe000 [ 131.466995] PC is at exynos_sysmmu_irq+0x2a0/0x2a8 [ 131.471766] LR is at vprintk_emit+0x268/0x594 [ 131.476103] pc : [] lr : [] psr: a00001d3 [ 131.476103] sp : ec4ff9d8 ip : 00000000 fp : ec4ffa14 [ 131.487559] r10: ffffffda r9 : ee206e28 r8 : ee2d1a10 [ 131.492767] r7 : 00000000 r6 : 00000000 r5 : 00000000 r4 : ee206e10 [ 131.499277] r3 : c06fca20 r2 : 00000000 r1 : 00000000 r0 : ee28be00 [ 131.505788] Flags: NzCv IRQs off FIQs off Mode SVC_32 ISA ARM Segment user [ 131.513079] Control: 10c5387d Table: 6c72404a DAC: 00000015 [ 131.518808] Process lt-modetest (pid: 2418, stack limit = 0xec4fe218) [ 131.525231] Stack: (0xec4ff9d8 to 0xec500000) [ 131.529571] f9c0: ec4ff9e4 c03a0c40 [ 131.537732] f9e0: bbfa6e35 6d990000 6d161c3d ee20a900 ee04a7e0 00000028 ee007000 00000000 [ 131.545891] fa00: 00000000 c06fb1fc ec4ffa5c ec4ffa18 c0066a34 c0277f10 ee257664 0000000b [ 131.554050] fa20: ec4ffa5c c06fafbb ee04a780 c06fb1e8 00000000 ee04a780 ee04a7e0 ee20a900 [ 131.562209] fa40: ee007000 00000015 ec4ffb48 ee008000 ec4ffa7c ec4ffa60 c0066c90 c00669e0 [ 131.570369] fa60: 00020000 ee04a780 ee04a7e0 00001000 ec4ffa94 ec4ffa80 c0069c6c c0066c58 [ 131.578528] fa80: 00000028 ee004450 ec4ffaac ec4ffa98 c0066028 c0069bac 000000a0 c06e19b4 [ 131.586687] faa0: ec4ffad4 ec4ffab0 c0223678 c0066000 c02235dc 00000015 00000000 00000015 [ 131.594846] fac0: ec4ffc80 00000001 ec4ffaec ec4ffad8 c0066028 c02235e8 00000089 c06bfc54 [ 131.603005] fae0: ec4ffb1c ec4ffaf0 c006633c c0066000 ec4ffb48 f002000c 00000025 00000015 [ 131.611165] fb00: c06c680c ec4ffb48 f0020000 ee008000 ec4ffb44 ec4ffb20 c000867c c00662c4 [ 131.619324] fb20: c02046ac 60000153 ffffffff ec4ffb7c 00000000 00000101 ec4ffbb4 ec4ffb48 [ 131.627483] fb40: c0013240 c0008650 00000001 ee257508 00000002 00000001 ee257504 ee257508 [ 131.635642] fb60: 00000000 c06bf27c 00000000 00000101 ee008000 ec4ffbb4 00000000 ec4ffb90 [ 131.643802] fb80: c002e124 c02046ac 60000153 ffffffff c002e09c 00000000 c06c6080 00000283 [ 131.651960] fba0: 00000001 c06fb1ac ec4ffc0c ec4ffbb8 c002d690 c002e0a8 ee78d080 ee008000 [ 131.660120] fbc0: 00400000 c04eb3b0 ffff7c44 c06c6100 c06fdac0 0000000a c06bf2f0 c06c6080 [ 131.668279] fbe0: c06bfc54 c06bfc54 00000000 00000025 00000000 00000001 ec4ffc80 ee008000 [ 131.676438] fc00: ec4ffc24 ec4ffc10 c002dbb8 c002d564 00000089 c06bfc54 ec4ffc54 ec4ffc28 [ 131.684597] fc20: c0066340 c002dafc ec4ffc80 f002000c 0000001c 0000000c c06c680c ec4ffc80 [ 131.692757] fc40: f0020000 00000080 ec4ffc7c ec4ffc58 c000867c c00662c4 c04e6624 60000053 [ 131.700916] fc60: ffffffff ec4ffcb4 c072df54 ee22d010 ec4ffcdc ec4ffc80 c0013240 c0008650 [ 131.709075] fc80: ee22d664 ee194100 00000000 ec4fe000 60000053 00000400 00000002 ee22d420 [ 131.717234] fca0: c072df54 ee22d010 00000080 ec4ffcdc ec4ffcc8 ec4ffcc8 c04e6620 c04e6624 [ 131.725393] fcc0: 60000053 ffffffff ec4fe000 c072df54 ec4ffd34 ec4ffce0 c02b64d0 c04e6618 [ 131.733552] fce0: ec4ffcf8 00000000 00000000 60000053 00010000 00010000 00000000 200cb000 [ 131.741712] fd00: 20080000 ee22d664 00000001 ee256000 ee261400 ee22d420 00000080 00000080 [ 131.749871] fd20: ee256000 00000280 ec4ffd74 ec4ffd38 c02a8844 c02b5fec 00000080 00000280 [ 131.758030] fd40: 000001e0 00000000 00000000 00000280 000001e0 ee22d220 01e00000 00000002 [ 131.766189] fd60: ee22d420 ee261400 ec4ffdbc ec4ffd78 c0293cbc c02a87a4 00000080 00000280 [ 131.774348] fd80: 000001e0 00000000 00000000 02800000 01e00000 ee261400 ee22d460 ee261400 [ 131.782508] fda0: ee22d420 00000000 01e00000 000001e0 ec4ffe24 ec4ffdc0 c0297800 c0293b24 [ 131.790667] fdc0: 00000080 00000280 000001e0 00000000 00000000 02800000 01e00000 ec4ffdf8 [ 131.798826] fde0: c028db00 00000080 00000080 ee256000 02800000 00000000 ec4ffe24 c06c6448 [ 131.806985] fe00: c072df54 000000b7 ee013800 ec4ffe54 edbf7300 ec4ffe54 ec4fff04 ec4ffe28 [ 131.815145] fe20: c028a848 c029768c 00000001 c06195d8 ec4ffe5c ec4ffe40 c0297680 c0521f6c [ 131.823304] fe40: 00000030 bed45d38 00000030 c03064b7 ec4ffe8c 00000011 00000015 00000022 [ 131.831463] fe60: 00000000 00000080 00000080 00000280 000001e0 00000000 00000000 01e00000 [ 131.839622] fe80: 02800000 00000000 00000000 0004b000 00000000 00000000 c00121e4 c0011080 [ 131.847781] fea0: c00110a4 00000000 00000000 00000000 ec4ffeec ec4ffec0 c00110f0 c00121cc [ 131.855940] fec0: 00000000 c00e7fec ec4ffeec ec4ffed8 c004af2c dc8ba201 edae4fc0 edbf7000 [ 131.864100] fee0: edbf7000 00000003 bed45d38 00000003 bed45d38 ee3f2040 ec4fff7c ec4fff08 [ 131.872259] ff00: c010b62c c028a684 edae4fc0 00000000 00000000 b6666000 ec40d108 edae4fc4 [ 131.880418] ff20: ec4fff6c ec4fff30 c00e7fec c02207b0 000001f9 00000000 edae5008 ec40d110 [ 131.888577] ff40: 00070800 edae5008 edae4fc0 00070800 b6666000 edbf7000 edbf7000 c03064b7 [ 131.896736] ff60: bed45d38 00000003 ec4fe000 00000000 ec4fffa4 ec4fff80 c010b84c c010b208 [ 131.904896] ff80: 00000022 00000000 bed45d38 c03064b7 00000036 c000ede4 00000000 ec4fffa8 [ 131.913055] ffa0: c000ec40 c010b81c 00000000 bed45d38 00000003 c03064b7 bed45d38 00000022 [ 131.921214] ffc0: 00000000 bed45d38 c03064b7 00000036 00000080 00000080 00000000 000001e0 [ 131.929373] ffe0: b6da4064 bed45d1c b6d98968 b6e8082c 60000050 00000003 00000000 00000000 [ 131.937529] Backtrace: [ 131.939967] [] (exynos_sysmmu_irq) from [] (handle_irq_event_percpu+0x60/0x278) [ 131.948988] r10:c06fb1fc r9:00000000 r8:00000000 r7:ee007000 r6:00000028 r5:ee04a7e0 [ 131.956799] r4:ee20a900 [ 131.959320] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x44/0x64) [ 131.968170] r10:ee008000 r9:ec4ffb48 r8:00000015 r7:ee007000 r6:ee20a900 r5:ee04a7e0 [ 131.975982] r4:ee04a780 [ 131.978504] [] (handle_irq_event) from [] (handle_level_irq+0xcc/0x144) [ 131.986832] r6:00001000 r5:ee04a7e0 r4:ee04a780 r3:00020000 [ 131.992478] [] (handle_level_irq) from [] (generic_handle_irq+0x34/0x44) [ 132.000894] r5:ee004450 r4:00000028 [ 132.004459] [] (generic_handle_irq) from [] (combiner_handle_cascade_irq+0x9c/0x108) [ 132.013914] r4:c06e19b4 r3:000000a0 [ 132.017476] [] (combiner_handle_cascade_irq) from [] (generic_handle_irq+0x34/0x44) [ 132.026847] r8:00000001 r7:ec4ffc80 r6:00000015 r5:00000000 r4:00000015 r3:c02235dc [ 132.034576] [] (generic_handle_irq) from [] (__handle_domain_irq+0x84/0xf0) [ 132.043252] r4:c06bfc54 r3:00000089 [ 132.046815] [] (__handle_domain_irq) from [] (gic_handle_irq+0x38/0x70) [ 132.055144] r10:ee008000 r9:f0020000 r8:ec4ffb48 r7:c06c680c r6:00000015 r5:00000025 [ 132.062956] r4:f002000c r3:ec4ffb48 [ 132.066520] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 132.073980] Exception stack(0xec4ffb48 to 0xec4ffb90) [ 132.079016] fb40: 00000001 ee257508 00000002 00000001 ee257504 ee257508 [ 132.087176] fb60: 00000000 c06bf27c 00000000 00000101 ee008000 ec4ffbb4 00000000 ec4ffb90 [ 132.095333] fb80: c002e124 c02046ac 60000153 ffffffff [ 132.100367] r9:00000101 r8:00000000 r7:ec4ffb7c r6:ffffffff r5:60000153 r4:c02046ac [ 132.108098] [] (tasklet_hi_action) from [] (__do_softirq+0x138/0x38c) [ 132.116251] r8:c06fb1ac r7:00000001 r6:00000283 r5:c06c6080 r4:00000000 r3:c002e09c [ 132.123980] [] (__do_softirq) from [] (irq_exit+0xc8/0x104) [ 132.131268] r10:ee008000 r9:ec4ffc80 r8:00000001 r7:00000000 r6:00000025 r5:00000000 [ 132.139080] r4:c06bfc54 [ 132.141600] [] (irq_exit) from [] (__handle_domain_irq+0x88/0xf0) [ 132.149409] r4:c06bfc54 r3:00000089 [ 132.152971] [] (__handle_domain_irq) from [] (gic_handle_irq+0x38/0x70) [ 132.161300] r10:00000080 r9:f0020000 r8:ec4ffc80 r7:c06c680c r6:0000000c r5:0000001c [ 132.169112] r4:f002000c r3:ec4ffc80 [ 132.172675] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 132.180137] Exception stack(0xec4ffc80 to 0xec4ffcc8) [ 132.185173] fc80: ee22d664 ee194100 00000000 ec4fe000 60000053 00000400 00000002 ee22d420 [ 132.193332] fca0: c072df54 ee22d010 00000080 ec4ffcdc ec4ffcc8 ec4ffcc8 c04e6620 c04e6624 [ 132.201489] fcc0: 60000053 ffffffff [ 132.204961] r9:ee22d010 r8:c072df54 r7:ec4ffcb4 r6:ffffffff r5:60000053 r4:c04e6624 [ 132.212694] [] (_raw_spin_unlock_irqrestore) from [] (mixer_win_commit+0x4f0/0xcc8) [ 132.222060] r4:c072df54 r3:ec4fe000 [ 132.225625] [] (mixer_win_commit) from [] (exynos_update_plane+0xac/0xb8) [ 132.234126] r10:00000280 r9:ee256000 r8:00000080 r7:00000080 r6:ee22d420 r5:ee261400 [ 132.241937] r4:ee256000 [ 132.244461] [] (exynos_update_plane) from [] (__setplane_internal+0x1a4/0x2c0) [ 132.253395] r7:ee261400 r6:ee22d420 r5:00000002 r4:01e00000 [ 132.259041] [] (__setplane_internal) from [] (drm_mode_setplane+0x180/0x244) [ 132.267804] r9:000001e0 r8:01e00000 r7:00000000 r6:ee22d420 r5:ee261400 r4:ee22d460 [ 132.275535] [] (drm_mode_setplane) from [] (drm_ioctl+0x1d0/0x58c) [ 132.283428] r10:ec4ffe54 r9:edbf7300 r8:ec4ffe54 r7:ee013800 r6:000000b7 r5:c072df54 [ 132.291240] r4:c06c6448 [ 132.293763] [] (drm_ioctl) from [] (do_vfs_ioctl+0x430/0x614) [ 132.301222] r10:ee3f2040 r9:bed45d38 r8:00000003 r7:bed45d38 r6:00000003 r5:edbf7000 [ 132.309034] r4:edbf7000 [ 132.311555] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64) [ 132.318842] r10:00000000 r9:ec4fe000 r8:00000003 r7:bed45d38 r6:c03064b7 r5:edbf7000 [ 132.326654] r4:edbf7000 [ 132.329176] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) [ 132.336723] r8:c000ede4 r7:00000036 r6:c03064b7 r5:bed45d38 r4:00000000 r3:00000022 [ 132.344451] Code: e3130002 0affffaf eb09a67d eaffffad (e7f001f2) [ 132.350528] ---[ end trace d428689b94df895c ]--- [ 132.355126] Kernel panic - not syncing: Fatal exception in interrupt [ 132.361465] CPU2: stopping [ 132.364155] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G D W 4.0.1-debug+ #3 [ 132.371791] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 132.377866] Backtrace: [ 132.380304] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 132.387849] r6:c06e158c r5:ffffffff r4:00000000 r3:dc8ba201 [ 132.393497] [] (show_stack) from [] (dump_stack+0x88/0xc8) [ 132.400698] [] (dump_stack) from [] (handle_IPI+0x1c8/0x2c4) [ 132.408073] r6:c06bfc54 r5:c06bfc54 r4:00000005 r3:ee0b0000 [ 132.413718] [] (handle_IPI) from [] (gic_handle_irq+0x6c/0x70) [ 132.421267] r9:f0028000 r8:ee0b1f48 r7:c06c680c r6:fffffff5 r5:00000005 r4:f002800c [ 132.428995] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 132.436457] Exception stack(0xee0b1f48 to 0xee0b1f90) [ 132.441493] 1f40: 00000001 00000000 00000000 c00206c0 c06c6518 c04eb3a4 [ 132.449653] 1f60: 00000000 00000000 c06c0dc0 00000001 c06fb774 ee0b1f9c ee0b1fa0 ee0b1f90 [ 132.457811] 1f80: c000f82c c000f830 600f0053 ffffffff [ 132.462844] r9:00000001 r8:c06c0dc0 r7:ee0b1f7c r6:ffffffff r5:600f0053 r4:c000f830 [ 132.470575] [] (arch_cpu_idle) from [] (cpu_startup_entry+0x318/0x4ec) [ 132.478818] [] (cpu_startup_entry) from [] (secondary_start_kernel+0xf4/0x100) [ 132.487755] r7:c06fd440 [ 132.490279] [] (secondary_start_kernel) from [<40008744>] (0x40008744) [ 132.497651] r4:6e09006a r3:c000872c [ 132.501210] CPU3: stopping [ 132.503904] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G D W 4.0.1-debug+ #3 [ 132.511539] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 132.517614] Backtrace: [ 132.520051] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 132.527597] r6:c06e158c r5:ffffffff r4:00000000 r3:dc8ba201 [ 132.533243] [] (show_stack) from [] (dump_stack+0x88/0xc8) [ 132.540446] [] (dump_stack) from [] (handle_IPI+0x1c8/0x2c4) [ 132.547820] r6:c06bfc54 r5:c06bfc54 r4:00000005 r3:ee0b2000 [ 132.553466] [] (handle_IPI) from [] (gic_handle_irq+0x6c/0x70) [ 132.561014] r9:f002c000 r8:ee0b3f48 r7:c06c680c r6:fffffff5 r5:00000005 r4:f002c00c [ 132.568743] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 132.576205] Exception stack(0xee0b3f48 to 0xee0b3f90) [ 132.581241] 3f40: 00000001 00000000 00000000 c00206c0 c06c6518 c04eb3a4 [ 132.589401] 3f60: 00000000 00000000 c06c0dc0 00000001 c06fb774 ee0b3f9c ee0b3fa0 ee0b3f90 [ 132.597558] 3f80: c000f82c c000f830 600f0053 ffffffff [ 132.602591] r9:00000001 r8:c06c0dc0 r7:ee0b3f7c r6:ffffffff r5:600f0053 r4:c000f830 [ 132.610321] [] (arch_cpu_idle) from [] (cpu_startup_entry+0x318/0x4ec) [ 132.618566] [] (cpu_startup_entry) from [] (secondary_start_kernel+0xf4/0x100) [ 132.627503] r7:c06fd440 [ 132.630023] [] (secondary_start_kernel) from [<40008744>] (0x40008744) [ 132.637399] r4:6e09006a r3:c000872c [ 132.640958] CPU1: stopping [ 132.643651] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G D W 4.0.1-debug+ #3 [ 132.651287] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 132.657362] Backtrace: [ 132.659799] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 132.667344] r6:c06e158c r5:ffffffff r4:00000000 r3:dc8ba201 [ 132.672991] [] (show_stack) from [] (dump_stack+0x88/0xc8) [ 132.680194] [] (dump_stack) from [] (handle_IPI+0x1c8/0x2c4) [ 132.687569] r6:c06bfc54 r5:c06bfc54 r4:00000005 r3:ee0ae000 [ 132.693214] [] (handle_IPI) from [] (gic_handle_irq+0x6c/0x70) [ 132.700762] r9:f0024000 r8:ee0aff48 r7:c06c680c r6:fffffff5 r5:00000005 r4:f002400c [ 132.708491] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 132.715953] Exception stack(0xee0aff48 to 0xee0aff90) [ 132.720989] ff40: 00000001 00000000 00000000 c00206c0 c06c6518 c04eb3a4 [ 132.729149] ff60: 00000000 00000000 c06c0dc0 00000001 c06fb774 ee0aff9c ee0affa0 ee0aff90 [ 132.737306] ff80: c000f82c c000f830 60070053 ffffffff [ 132.742339] r9:00000001 r8:c06c0dc0 r7:ee0aff7c r6:ffffffff r5:60070053 r4:c000f830 [ 132.750069] [] (arch_cpu_idle) from [] (cpu_startup_entry+0x318/0x4ec) [ 132.758314] [] (cpu_startup_entry) from [] (secondary_start_kernel+0xf4/0x100) [ 132.767251] r7:c06fd440 [ 132.769772] [] (secondary_start_kernel) from [<40008744>] (0x40008744) [ 132.777146] r4:6e09006a r3:c000872c [ 132.780709] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Tobias Jakobi Reviewed-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 31c1793bc97b..285e1569e6ad 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -365,8 +365,6 @@ static void mixer_run(struct mixer_context *ctx) struct mixer_resources *res = &ctx->mixer_res; mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_REG_RUN); - - mixer_regs_dump(ctx); } static void mixer_stop(struct mixer_context *ctx) @@ -379,8 +377,6 @@ static void mixer_stop(struct mixer_context *ctx) while (!(mixer_reg_read(res, MXR_STATUS) & MXR_STATUS_REG_IDLE) && --timeout) usleep_range(10000, 12000); - - mixer_regs_dump(ctx); } static void vp_video_buffer(struct mixer_context *ctx, int win) @@ -480,6 +476,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) mixer_vsync_set_update(ctx, true); spin_unlock_irqrestore(&res->reg_slock, flags); + mixer_regs_dump(ctx); vp_regs_dump(ctx); } @@ -620,6 +617,8 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win) mixer_vsync_set_update(ctx, true); spin_unlock_irqrestore(&res->reg_slock, flags); + + mixer_regs_dump(ctx); } static void vp_win_reset(struct mixer_context *ctx) @@ -1073,6 +1072,7 @@ static void mixer_poweroff(struct mixer_context *ctx) mutex_unlock(&ctx->mixer_mutex); mixer_stop(ctx); + mixer_regs_dump(ctx); mixer_window_suspend(ctx); ctx->int_en = mixer_reg_read(res, MXR_INT_EN); From 5b1d5bc690a9666b375496f5d680278f19687bc4 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 6 May 2015 14:10:22 +0200 Subject: [PATCH 193/481] drm/exynos: 'win' is always unsigned The index for the hardware layer is always >=0. Previous code that also used -1 as special index is now gone. Also apply this to 'ch_enabled' (decon/fimd), since the variable is on the same line (and is again always unsigned). Signed-off-by: Tobias Jakobi Reviewed-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 7 ++++--- drivers/gpu/drm/exynos/exynos_mixer.c | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 3d00df76220d..6714e5b193ea 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -91,7 +91,7 @@ static void decon_wait_for_vblank(struct exynos_drm_crtc *crtc) static void decon_clear_channel(struct decon_context *ctx) { - int win, ch_enabled = 0; + unsigned int win, ch_enabled = 0; DRM_DEBUG_KMS("%s\n", __FILE__); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index d704f2ba7179..a0edab833148 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -215,7 +215,7 @@ static void fimd_wait_for_vblank(struct exynos_drm_crtc *crtc) DRM_DEBUG_KMS("vblank wait timed out.\n"); } -static void fimd_enable_video_output(struct fimd_context *ctx, int win, +static void fimd_enable_video_output(struct fimd_context *ctx, unsigned int win, bool enable) { u32 val = readl(ctx->regs + WINCON(win)); @@ -228,7 +228,8 @@ static void fimd_enable_video_output(struct fimd_context *ctx, int win, writel(val, ctx->regs + WINCON(win)); } -static void fimd_enable_shadow_channel_path(struct fimd_context *ctx, int win, +static void fimd_enable_shadow_channel_path(struct fimd_context *ctx, + unsigned int win, bool enable) { u32 val = readl(ctx->regs + SHADOWCON); @@ -243,7 +244,7 @@ static void fimd_enable_shadow_channel_path(struct fimd_context *ctx, int win, static void fimd_clear_channel(struct fimd_context *ctx) { - int win, ch_enabled = 0; + unsigned int win, ch_enabled = 0; DRM_DEBUG_KMS("%s\n", __FILE__); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 285e1569e6ad..8874c1fcb3ab 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -333,7 +333,8 @@ static void mixer_cfg_rgb_fmt(struct mixer_context *ctx, unsigned int height) mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_RGB_FMT_MASK); } -static void mixer_cfg_layer(struct mixer_context *ctx, int win, bool enable) +static void mixer_cfg_layer(struct mixer_context *ctx, unsigned int win, + bool enable) { struct mixer_resources *res = &ctx->mixer_res; u32 val = enable ? ~0 : 0; @@ -379,7 +380,7 @@ static void mixer_stop(struct mixer_context *ctx) usleep_range(10000, 12000); } -static void vp_video_buffer(struct mixer_context *ctx, int win) +static void vp_video_buffer(struct mixer_context *ctx, unsigned int win) { struct mixer_resources *res = &ctx->mixer_res; unsigned long flags; @@ -511,7 +512,7 @@ fail: return -ENOTSUPP; } -static void mixer_graph_buffer(struct mixer_context *ctx, int win) +static void mixer_graph_buffer(struct mixer_context *ctx, unsigned int win) { struct mixer_resources *res = &ctx->mixer_res; unsigned long flags; From be083a002f421bea1fccc2c8a14ffb2c640fe792 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sat, 25 Apr 2015 20:06:54 +0200 Subject: [PATCH 194/481] drm/exynos: cleanup exynos_drm_plane Remove the unused fields of struct exynos_drm_plane. v2: Remove index_color as well, also unused (thanks Joonyoung). Signed-off-by: Tobias Jakobi Reviewed-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 0e951226ca06..29e3fb78c615 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -71,13 +71,6 @@ enum exynos_drm_output_type { * @dma_addr: array of bus(accessed by dma) address to the memory region * allocated for a overlay. * @zpos: order of overlay layer(z position). - * @index_color: if using color key feature then this value would be used - * as index color. - * @default_win: a window to be enabled. - * @color_key: color key on or off. - * @local_path: in case of lcd type, local path mode on or off. - * @transparency: transparency on or off. - * @activated: activated or not. * @enabled: enabled or not. * @resume: to resume or not. * @@ -108,13 +101,7 @@ struct exynos_drm_plane { uint32_t pixel_format; dma_addr_t dma_addr[MAX_FB_BUFFER]; unsigned int zpos; - unsigned int index_color; - bool default_win:1; - bool color_key:1; - bool local_path:1; - bool transparency:1; - bool activated:1; bool enabled:1; bool resume:1; }; From b0f155ada4c819f06aa32b4c906e7e76350c7ec1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 14 May 2015 09:03:06 +0900 Subject: [PATCH 195/481] drm/exynos: dp: Lower level of EDID read success message Don't pollute the dmesg with EDID read success message as an error. Printing as debug should be fine. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_dp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c index 441ef06b8894..30feb7d06624 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.c +++ b/drivers/gpu/drm/exynos/exynos_dp_core.c @@ -195,7 +195,7 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp) } } - dev_err(dp->dev, "EDID Read success!\n"); + dev_dbg(dp->dev, "EDID Read success!\n"); return 0; } From e46b5a6470a5e2c8e1096f8f60887ac19949055b Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 19 May 2015 22:06:41 +0800 Subject: [PATCH 196/481] ARM: dts: fix imx27 dtb build rule The i.MX27 dtb build should be controlled by CONFIG_SOC_IMX27 rather than CONFIG_SOC_IMX31. Signed-off-by: Shawn Guo Fixes: cb612390e546 ("ARM: dts: Only build dtb if associated Arch and/or SoC is enabled") Cc: --- arch/arm/boot/dts/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 86217db2937a..992736b5229b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \ imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \ imx25-karo-tx25.dtb \ imx25-pdk.dtb -dtb-$(CONFIG_SOC_IMX31) += \ +dtb-$(CONFIG_SOC_IMX27) += \ imx27-apf27.dtb \ imx27-apf27dev.dtb \ imx27-eukrea-mbimxsd27-baseboard.dtb \ From 1e70897d0e20f988abedcf73b33684ecd2be9511 Mon Sep 17 00:00:00 2001 From: Naidu Tellapati Date: Fri, 8 May 2015 18:47:31 -0300 Subject: [PATCH 197/481] pwm: img: Impose upper and lower timebase steps value The PWM hardware on Pistachio platform has a maximum timebase steps value to 255. To fix it, let's introduce a compatible-specific data structure to contain the SoC-specific details and use it to specify a maximum timebase. Also, let's limit the minimum timebase to 16 steps, to allow a sane range of duty cycle steps. Fixes: 277bb6a29e00 ("pwm: Imagination Technologies PWM DAC driver") Signed-off-by: Naidu Tellapati Signed-off-by: Ezequiel Garcia Signed-off-by: Thierry Reding --- drivers/pwm/pwm-img.c | 76 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 476171a768d6..8a029f9bc18c 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,22 @@ #define PERIP_PWM_PDM_CONTROL_CH_MASK 0x1 #define PERIP_PWM_PDM_CONTROL_CH_SHIFT(ch) ((ch) * 4) -#define MAX_TMBASE_STEPS 65536 +/* + * PWM period is specified with a timebase register, + * in number of step periods. The PWM duty cycle is also + * specified in step periods, in the [0, $timebase] range. + * In other words, the timebase imposes the duty cycle + * resolution. Therefore, let's constraint the timebase to + * a minimum value to allow a sane range of duty cycle values. + * Imposing a minimum timebase, will impose a maximum PWM frequency. + * + * The value chosen is completely arbitrary. + */ +#define MIN_TMBASE_STEPS 16 + +struct img_pwm_soc_data { + u32 max_timebase; +}; struct img_pwm_chip { struct device *dev; @@ -47,6 +63,9 @@ struct img_pwm_chip { struct clk *sys_clk; void __iomem *base; struct regmap *periph_regs; + int max_period_ns; + int min_period_ns; + const struct img_pwm_soc_data *data; }; static inline struct img_pwm_chip *to_img_pwm_chip(struct pwm_chip *chip) @@ -72,24 +91,31 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, u32 val, div, duty, timebase; unsigned long mul, output_clk_hz, input_clk_hz; struct img_pwm_chip *pwm_chip = to_img_pwm_chip(chip); + unsigned int max_timebase = pwm_chip->data->max_timebase; + + if (period_ns < pwm_chip->min_period_ns || + period_ns > pwm_chip->max_period_ns) { + dev_err(chip->dev, "configured period not in range\n"); + return -ERANGE; + } input_clk_hz = clk_get_rate(pwm_chip->pwm_clk); output_clk_hz = DIV_ROUND_UP(NSEC_PER_SEC, period_ns); mul = DIV_ROUND_UP(input_clk_hz, output_clk_hz); - if (mul <= MAX_TMBASE_STEPS) { + if (mul <= max_timebase) { div = PWM_CTRL_CFG_NO_SUB_DIV; timebase = DIV_ROUND_UP(mul, 1); - } else if (mul <= MAX_TMBASE_STEPS * 8) { + } else if (mul <= max_timebase * 8) { div = PWM_CTRL_CFG_SUB_DIV0; timebase = DIV_ROUND_UP(mul, 8); - } else if (mul <= MAX_TMBASE_STEPS * 64) { + } else if (mul <= max_timebase * 64) { div = PWM_CTRL_CFG_SUB_DIV1; timebase = DIV_ROUND_UP(mul, 64); - } else if (mul <= MAX_TMBASE_STEPS * 512) { + } else if (mul <= max_timebase * 512) { div = PWM_CTRL_CFG_SUB_DIV0_DIV1; timebase = DIV_ROUND_UP(mul, 512); - } else if (mul > MAX_TMBASE_STEPS * 512) { + } else if (mul > max_timebase * 512) { dev_err(chip->dev, "failed to configure timebase steps/divider value\n"); return -EINVAL; @@ -143,11 +169,27 @@ static const struct pwm_ops img_pwm_ops = { .owner = THIS_MODULE, }; +static const struct img_pwm_soc_data pistachio_pwm = { + .max_timebase = 255, +}; + +static const struct of_device_id img_pwm_of_match[] = { + { + .compatible = "img,pistachio-pwm", + .data = &pistachio_pwm, + }, + { } +}; +MODULE_DEVICE_TABLE(of, img_pwm_of_match); + static int img_pwm_probe(struct platform_device *pdev) { int ret; + u64 val; + unsigned long clk_rate; struct resource *res; struct img_pwm_chip *pwm; + const struct of_device_id *of_dev_id; pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL); if (!pwm) @@ -160,6 +202,11 @@ static int img_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->base)) return PTR_ERR(pwm->base); + of_dev_id = of_match_device(img_pwm_of_match, &pdev->dev); + if (!of_dev_id) + return -ENODEV; + pwm->data = of_dev_id->data; + pwm->periph_regs = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "img,cr-periph"); if (IS_ERR(pwm->periph_regs)) @@ -189,6 +236,17 @@ static int img_pwm_probe(struct platform_device *pdev) goto disable_sysclk; } + clk_rate = clk_get_rate(pwm->pwm_clk); + + /* The maximum input clock divider is 512 */ + val = (u64)NSEC_PER_SEC * 512 * pwm->data->max_timebase; + do_div(val, clk_rate); + pwm->max_period_ns = val; + + val = (u64)NSEC_PER_SEC * MIN_TMBASE_STEPS; + do_div(val, clk_rate); + pwm->min_period_ns = val; + pwm->chip.dev = &pdev->dev; pwm->chip.ops = &img_pwm_ops; pwm->chip.base = -1; @@ -228,12 +286,6 @@ static int img_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pwm_chip->chip); } -static const struct of_device_id img_pwm_of_match[] = { - { .compatible = "img,pistachio-pwm", }, - { } -}; -MODULE_DEVICE_TABLE(of, img_pwm_of_match); - static struct platform_driver img_pwm_driver = { .driver = { .name = "img-pwm", From b5d721d761fccf491f92eefc611e318561683d0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2015 17:06:14 -0700 Subject: [PATCH 198/481] inet: properly align icsk_ca_priv tcp_illinois and upcoming tcp_cdg require 64bit alignment of icsk_ca_priv x86 does not care, but other architectures might. Fixes: 05cbc0db03e82 ("ipv4: Create probe timer for tcp PMTU as per RFC4821") Signed-off-by: Eric Dumazet Cc: Fan Du Acked-by: Fan Du Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 48a815823587..497bc14cdb85 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -129,9 +129,10 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; - u32 icsk_ca_priv[16]; u32 icsk_user_timeout; -#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) + + u64 icsk_ca_priv[64 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ From 1173ff09b9c57be8248427b7be161f7599dccd6b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 19 May 2015 09:07:27 +0200 Subject: [PATCH 199/481] watchdog: fix double lock in watchdog_nmi_enable_all Commit ab992dc38f9a ("watchdog: Fix merge 'conflict'") has introduced an obvious deadlock because of a typo. watchdog_proc_mutex should be unlocked on exit. Thanks to Miroslav Benes who was staring at the code with me and noticed this. Signed-off-by: Michal Hocko Duh-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 506edcc500c4..581a68a04c64 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -621,7 +621,7 @@ void watchdog_nmi_enable_all(void) put_online_cpus(); unlock: - mutex_lock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) From 77bb3dfdc0d554befad58fdefbc41be5bc3ed38a Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 19 May 2015 18:40:49 +0100 Subject: [PATCH 200/481] xen/events: don't bind non-percpu VIRQs with percpu chip A non-percpu VIRQ (e.g., VIRQ_CONSOLE) may be freed on a different VCPU than it is bound to. This can result in a race between handle_percpu_irq() and removing the action in __free_irq() because handle_percpu_irq() does not take desc->lock. The interrupt handler sees a NULL action and oopses. Only use the percpu chip/handler for per-CPU VIRQs (like VIRQ_TIMER). # cat /proc/interrupts | grep virq 40: 87246 0 xen-percpu-virq timer0 44: 0 0 xen-percpu-virq debug0 47: 0 20995 xen-percpu-virq timer1 51: 0 0 xen-percpu-virq debug1 69: 0 0 xen-dyn-virq xen-pcpu 74: 0 0 xen-dyn-virq mce 75: 29 0 xen-dyn-virq hvc_console Signed-off-by: David Vrabel Cc: --- drivers/tty/hvc/hvc_xen.c | 2 +- drivers/xen/events/events_base.c | 12 ++++++++---- include/xen/events.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 5bab1c684bb1..7a3d146a5f0e 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -289,7 +289,7 @@ static int xen_initial_domain_console_init(void) return -ENOMEM; } - info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); + info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; spin_lock(&xencons_lock); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 2b8553bd8715..38387950490e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -957,7 +957,7 @@ unsigned xen_evtchn_nr_channels(void) } EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); -int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; int evtchn, irq, ret; @@ -971,8 +971,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, - handle_percpu_irq, "virq"); + if (percpu) + irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + handle_percpu_irq, "virq"); + else + irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_edge_irq, "virq"); bind_virq.virq = virq; bind_virq.vcpu = cpu; @@ -1062,7 +1066,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, { int irq, retval; - irq = bind_virq_to_irq(virq, cpu); + irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); diff --git a/include/xen/events.h b/include/xen/events.h index 5321cd9636e6..7d95fdf9cf3e 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,7 +17,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); -int bind_virq_to_irq(unsigned int virq, unsigned int cpu); +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, From 33b4b015e1a1ca7a8fdce40af5e71642a8ea355c Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Mon, 18 May 2015 21:08:49 +0200 Subject: [PATCH 201/481] net/ipv6/udp: Fix ipv6 multicast socket filter regression Commit <5cf3d46192fc> ("udp: Simplify__udp*_lib_mcast_deliver") simplified the filter for incoming IPv6 multicast but removed the check of the local socket address and the UDP destination address. This patch restores the filter to prevent sockets bound to a IPv6 multicast IP to receive other UDP traffic link unicast. Signed-off-by: Henning Rogge Fixes: 5cf3d46192fc ("udp: Simplify__udp*_lib_mcast_deliver") Cc: "David S. Miller" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3477c919fcc8..c2ec41617a35 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -731,7 +731,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) || + (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; if (!inet6_mc_check(sk, loc_addr, rmt_addr)) return false; From da34ac7626b571d262f92b93f11eb32dd58d9c4e Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 18 May 2015 12:31:44 -0700 Subject: [PATCH 202/481] tcp: only undo on partial ACKs in CA_Loss Undo based on TCP timestamps should only happen on ACKs that advance SND_UNA, according to the Eifel algorithm in RFC 3522: Section 3.2: (4) If the value of the Timestamp Echo Reply field of the acceptable ACK's Timestamps option is smaller than the value of RetransmitTS, then proceed to step (5), Section Terminology: We use the term 'acceptable ACK' as defined in [RFC793]. That is an ACK that acknowledges previously unacknowledged data. This is because upon receiving an out-of-order packet, the receiver returns the last timestamp that advances RCV_NXT, not the current timestamp of the packet in the DUPACK. Without checking the flag, the DUPACK will cause tcp_packet_delayed() to return true and tcp_try_undo_loss() will revert cwnd reduction. Note that we check the condition in CA_Recovery already by only calling tcp_try_undo_partial() if FLAG_SND_UNA_ADVANCED is set or tcp_try_undo_recovery() if snd_una crosses high_seq. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc790ea9960f..9faf775a8c4a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2698,11 +2698,16 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); + if ((flag & FLAG_SND_UNA_ADVANCED) && + tcp_try_undo_loss(sk, false)) + return; + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ /* Step 3.b. A timeout is spurious if not all data are * lost, i.e., never-retransmitted data are (s)acked. */ - if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) + if ((flag & FLAG_ORIG_SACK_ACKED) && + tcp_try_undo_loss(sk, true)) return; if (after(tp->snd_nxt, tp->high_seq) && @@ -2732,8 +2737,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) else if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); } - if (tcp_try_undo_loss(sk, false)) - return; tcp_xmit_retransmit_queue(sk); } From b7b0ed910cd8450db6d98cd4361c644bb1c88412 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 18 May 2015 12:31:45 -0700 Subject: [PATCH 203/481] tcp: don't over-send F-RTO probes After sending the new data packets to probe (step 2), F-RTO may incorrectly send more probes if the next ACK advances SND_UNA and does not sack new packet. However F-RTO RFC 5682 probes at most once. This bug may cause sender to always send new data instead of repairing holes, inducing longer HoL blocking on the receiver for the application. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9faf775a8c4a..243d674b3ef5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2710,9 +2710,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_try_undo_loss(sk, true)) return; - if (after(tp->snd_nxt, tp->high_seq) && - (flag & FLAG_DATA_SACKED || is_dupack)) { - tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + if (after(tp->snd_nxt, tp->high_seq)) { + if (flag & FLAG_DATA_SACKED || is_dupack) + tp->frto = 0; /* Step 3.a. loss was real */ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { tp->high_seq = tp->snd_nxt; __tcp_push_pending_frames(sk, tcp_current_mss(sk), From ee7619f2eb21304dcc846b8dc8f8c3d6cbe11792 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 19 May 2015 15:10:44 -0700 Subject: [PATCH 204/481] target: Drop signal_pending checks after interruptible lock acquire Once upon a time, iscsit_get_tpg() was using an un-interruptible lock. The signal_pending() usage was a check to allow userspace to break out of the operation with SIGINT. AFAICT, there's no reason why this is necessary anymore, and as reported by Alexey can be potentially dangerous. Also, go ahead and drop the other two problematic cases within iscsit_access_np() and sbc_compare_and_write() as well. Found by Linux Driver Verification project (linuxtesting.org). Reported-by: Alexey Khoroshilov Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 2 +- drivers/target/iscsi/iscsi_target_tpg.c | 5 +---- drivers/target/target_core_sbc.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 34871a628b11..74e6114ff18f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -230,7 +230,7 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) * Here we serialize access across the TIQN+TPG Tuple. */ ret = down_interruptible(&tpg->np_login_sem); - if ((ret != 0) || signal_pending(current)) + if (ret != 0) return -1; spin_lock_bh(&tpg->tpg_state_lock); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index e8a240818353..5e3295fe404d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -161,10 +161,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( int iscsit_get_tpg( struct iscsi_portal_group *tpg) { - int ret; - - ret = mutex_lock_interruptible(&tpg->tpg_access_lock); - return ((ret != 0) || signal_pending(current)) ? -1 : 0; + return mutex_lock_interruptible(&tpg->tpg_access_lock); } void iscsit_put_tpg(struct iscsi_portal_group *tpg) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 8855781ac653..733824e3825f 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -568,7 +568,7 @@ sbc_compare_and_write(struct se_cmd *cmd) * comparision using SGLs at cmd->t_bidi_data_sg.. */ rc = down_interruptible(&dev->caw_sem); - if ((rc != 0) || signal_pending(current)) { + if (rc != 0) { cmd->transport_complete_callback = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } From 2c2ed5aa0154c0be67f98c970de6b5587dbc045a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 19 May 2015 12:49:50 -0700 Subject: [PATCH 205/481] btrfs: clear 'ret' in btrfs_check_shared() loop btrfs_check_shared() is leaking a return value of '1' from find_parent_nodes(). As a result, callers (in this case, extent_fiemap()) are told extents are shared when they are not. This in turn broke fiemap on btrfs for kernels v3.18 and up. The fix is simple - we just have to clear 'ret' after we are done processing the results of find_parent_nodes(). It wasn't clear to me at first what was happening with return values in btrfs_check_shared() and find_parent_nodes() - thanks to Josef for the help on irc. I added documentation to both functions to make things more clear for the next hacker who might come across them. If we could queue this up for -stable too that would be great. Signed-off-by: Mark Fasheh Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9de772ee0031..614aaa1969bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * + * NOTE: This can return values > 0 + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, @@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return ret; } +/** + * btrfs_check_shared - tell us whether an extent is shared + * + * @trans: optional trans handle + * + * btrfs_check_shared uses the backref walking code but will short + * circuit as soon as it finds a root or inode that doesn't match the + * one passed in. This provides a significant performance benefit for + * callers (such as fiemap) which want to know whether the extent is + * shared but do not need a ref count. + * + * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. + */ int btrfs_check_shared(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 root_objectid, u64 inum, u64 bytenr) @@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, roots, NULL, root_objectid, inum); if (ret == BACKREF_FOUND_SHARED) { + /* this is the only condition under which we return 1 */ ret = 1; break; } if (ret < 0 && ret != -ENOENT) break; + ret = 0; node = ulist_next(tmp, &uiter); if (!node) break; From a96295965b600f2dc6ad661c4803c86e87db3d7b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 18 May 2015 19:11:40 +0100 Subject: [PATCH 206/481] Btrfs: fix racy system chunk allocation when setting block group ro If while setting a block group read-only we end up allocating a system chunk, through check_system_chunk(), we were not doing it while holding the chunk mutex which is a problem if a concurrent chunk allocation is happening, through do_chunk_alloc(), as it means both block groups can end up using the same logical addresses and physical regions in the device(s). So make sure we hold the chunk mutex. Cc: stable@vger.kernel.org # 4.0+ Fixes: 2f0810880f08 ("btrfs: delete chunk allocation attemp when setting block group ro") Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/volumes.c | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7effed6f2fa6..45e3f086790b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8842,7 +8842,9 @@ again: out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = update_block_group_flags(root, cache->flags); + lock_chunks(root->fs_info->chunk_root); check_system_chunk(trans, root, alloc_flags); + unlock_chunks(root->fs_info->chunk_root); } mutex_unlock(&root->fs_info->ro_block_group_mutex); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 96aebf3bcd5b..174f5e1e00ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4625,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, { u64 chunk_offset; + ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex)); chunk_offset = find_next_chunk(extent_root->fs_info); return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type); } From 976fa9a3ac9223d134cb4e1e3acf0e57160ee924 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 May 2015 17:20:36 +0200 Subject: [PATCH 207/481] ALSA: sound/atmel/ac97c.c: remove unused variable The recently added DT support for the ac97 driver is causing a gcc warning: sound/atmel/ac97c.c: In function 'atmel_ac97c_probe_dt': sound/atmel/ac97c.c:919:29: warning: unused variable 'match' [-Wunused-variable] const struct of_device_id *match; The variable is clearly unused, so we can remove it. Signed-off-by: Arnd Bergmann Acked-by: Alexander Stein Signed-off-by: Takashi Iwai --- sound/atmel/ac97c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c index cf4cedf2b420..6dad042630d8 100644 --- a/sound/atmel/ac97c.c +++ b/sound/atmel/ac97c.c @@ -916,7 +916,6 @@ static struct ac97c_platform_data *atmel_ac97c_probe_dt(struct device *dev) { struct ac97c_platform_data *pdata; struct device_node *node = dev->of_node; - const struct of_device_id *match; if (!node) { dev_err(dev, "Device does not have associated DT data\n"); From 48f4b3a2ece560e37232fe9d681cd2ff0f3ff10d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 May 2015 06:49:37 +0200 Subject: [PATCH 208/481] ALSA: hda - Reduce verbs by node power-saves The widget (node) power-saves restore the widget states at each transition from D3 to D0 on each node. This was added in the commit [d545a57c5f84:ALSA: hda - Sync node attributes at resume from widget power saving]. However, the test was rater false-positive; this wasn't needed for any codecs. Since the resync may take significant number of additional verbs to be executed, it's better to reduce it. Let's disable it for now again. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 788f969b1a68..1c8678775f40 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -844,8 +844,16 @@ static hda_nid_t path_power_update(struct hda_codec *codec, snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, state); changed = nid; + /* all known codecs seem to be capable to handl + * widgets state even in D3, so far. + * if any new codecs need to restore the widget + * states after D0 transition, call the function + * below. + */ +#if 0 /* disabled */ if (state == AC_PWRST_D0) snd_hdac_regmap_sync_node(&codec->core, nid); +#endif } } return changed; From 219f47e4f96442a7fe8fb646c59c6730690a9e66 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 May 2015 06:56:23 +0200 Subject: [PATCH 209/481] ALSA: hda - Disable widget power-saving for ALC292 & co We've got reports that ALC3226 (a Dell variant of ALC292) gives click noises at transition from D3 to D0 when the widget power-saving is enabled. Further debugging session showed that avoiding it isn't trivial, unfortunately, since paths are basically activated dynamically while the pins have been already enabled. This patch disables the widget power-saving for such codecs. Reported-by: Jonathan McDowell Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2e246fe495f6..31f8f13be907 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5623,7 +5623,8 @@ static int patch_alc269(struct hda_codec *codec) spec = codec->spec; spec->gen.shared_mic_vref_pin = 0x18; - codec->power_save_node = 1; + if (codec->core.vendor_id != 0x10ec0292) + codec->power_save_node = 1; snd_hda_pick_fixup(codec, alc269_fixup_models, alc269_fixup_tbl, alc269_fixups); From 5fb73bc2c8301ad106df0e858e60875cd2ae95cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 19 May 2015 19:45:03 +0200 Subject: [PATCH 210/481] thinkpad_acpi: Revert unintentional device attribute renaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion to DEVICE_ATTR_* macros failed to fixup a few cases where the old attribute names didn't match the show/store function names. Instead of renaming the functions, the attributes were renamed. This caused an unintentional API change. The hwmon required 'name' attribute were among the renamed attribute, causing libsensors to fail to detect the hwmon device at all. Fix by using the DEVICE_ATTR macro for these attributes, allowing the show/store functions to keep their system specific prefixes. Fixes: b4dd04ac6ef8 ("thinkpad_acpi: use DEVICE_ATTR_* macros") Cc: Bastien Nocera Signed-off-by: Bjørn Mork Acked-by: Henrique de Moraes Holschuh Signed-off-by: Darren Hart --- drivers/platform/x86/thinkpad_acpi.c | 37 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9bb9ad6d4a1b..28f328136f0d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2897,7 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason); } -static DEVICE_ATTR_RO(hotkey_wakeup_reason); +static DEVICE_ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL); static void hotkey_wakeup_reason_notify_change(void) { @@ -2913,7 +2913,8 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack); } -static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete); +static DEVICE_ATTR(wakeup_hotunplug_complete, S_IRUGO, + hotkey_wakeup_hotunplug_complete_show, NULL); static void hotkey_wakeup_hotunplug_complete_notify_change(void) { @@ -2978,8 +2979,8 @@ static struct attribute *hotkey_attributes[] __initdata = { &dev_attr_hotkey_enable.attr, &dev_attr_hotkey_bios_enabled.attr, &dev_attr_hotkey_bios_mask.attr, - &dev_attr_hotkey_wakeup_reason.attr, - &dev_attr_hotkey_wakeup_hotunplug_complete.attr, + &dev_attr_wakeup_reason.attr, + &dev_attr_wakeup_hotunplug_complete.attr, &dev_attr_hotkey_mask.attr, &dev_attr_hotkey_all_mask.attr, &dev_attr_hotkey_recommended_mask.attr, @@ -4393,12 +4394,13 @@ static ssize_t wan_enable_store(struct device *dev, attr, buf, count); } -static DEVICE_ATTR_RW(wan_enable); +static DEVICE_ATTR(wwan_enable, S_IWUSR | S_IRUGO, + wan_enable_show, wan_enable_store); /* --------------------------------------------------------------------- */ static struct attribute *wan_attributes[] = { - &dev_attr_wan_enable.attr, + &dev_attr_wwan_enable.attr, NULL }; @@ -8138,7 +8140,8 @@ static ssize_t fan_pwm1_enable_store(struct device *dev, return count; } -static DEVICE_ATTR_RW(fan_pwm1_enable); +static DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO, + fan_pwm1_enable_show, fan_pwm1_enable_store); /* sysfs fan pwm1 ------------------------------------------------------ */ static ssize_t fan_pwm1_show(struct device *dev, @@ -8198,7 +8201,7 @@ static ssize_t fan_pwm1_store(struct device *dev, return (rc) ? rc : count; } -static DEVICE_ATTR_RW(fan_pwm1); +static DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, fan_pwm1_show, fan_pwm1_store); /* sysfs fan fan1_input ------------------------------------------------ */ static ssize_t fan_fan1_input_show(struct device *dev, @@ -8215,7 +8218,7 @@ static ssize_t fan_fan1_input_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", speed); } -static DEVICE_ATTR_RO(fan_fan1_input); +static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL); /* sysfs fan fan2_input ------------------------------------------------ */ static ssize_t fan_fan2_input_show(struct device *dev, @@ -8232,7 +8235,7 @@ static ssize_t fan_fan2_input_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", speed); } -static DEVICE_ATTR_RO(fan_fan2_input); +static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL); /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */ static ssize_t fan_fan_watchdog_show(struct device_driver *drv, @@ -8265,8 +8268,8 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO, /* --------------------------------------------------------------------- */ static struct attribute *fan_attributes[] = { - &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr, - &dev_attr_fan_fan1_input.attr, + &dev_attr_pwm1_enable.attr, &dev_attr_pwm1.attr, + &dev_attr_fan1_input.attr, NULL, /* for fan2_input */ NULL }; @@ -8400,7 +8403,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (tp_features.second_fan) { /* attach second fan tachometer */ fan_attributes[ARRAY_SIZE(fan_attributes)-2] = - &dev_attr_fan_fan2_input.attr; + &dev_attr_fan2_input.attr; } rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group); @@ -8848,7 +8851,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME); } -static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name); +static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL); /* --------------------------------------------------------------------- */ @@ -9390,8 +9393,7 @@ static void thinkpad_acpi_module_exit(void) hwmon_device_unregister(tpacpi_hwmon); if (tp_features.sensors_pdev_attrs_registered) - device_remove_file(&tpacpi_sensors_pdev->dev, - &dev_attr_thinkpad_acpi_pdev_name); + device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); if (tpacpi_sensors_pdev) platform_device_unregister(tpacpi_sensors_pdev); if (tpacpi_pdev) @@ -9512,8 +9514,7 @@ static int __init thinkpad_acpi_module_init(void) thinkpad_acpi_module_exit(); return ret; } - ret = device_create_file(&tpacpi_sensors_pdev->dev, - &dev_attr_thinkpad_acpi_pdev_name); + ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); if (ret) { pr_err("unable to create sysfs hwmon device attributes\n"); thinkpad_acpi_module_exit(); From e88221c50cadade0eb4f7f149f4967d760212695 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 May 2015 11:45:30 +0200 Subject: [PATCH 211/481] x86/fpu: Disable XSAVES* support for now The kernel's handling of 'compacted' xsave state layout is buggy: http://marc.info/?l=linux-kernel&m=142967852317199 I don't have such a system, and the description there is vague, but from extrapolation I guess that there were two kinds of bugs observed: - boot crashes, due to size calculations being wrong and the dynamic allocation allocating a too small xstate area. (This is now fixed in the new FPU code - but still present in stable kernels.) - FPU state corruption and ABI breakage: if signal handlers try to change the FPU state in standard format, which then the kernel tries to restore in the compacted format. These breakages are scary, but they only occur on a small number of systems that have XSAVES* CPU support. Yet we have had XSAVES support in the upstream kernel for a large number of stable kernel releases, and the fixes are involved and unproven. So do the safe resolution first: disable XSAVES* support and only use the standard xstate format. This makes the code work and is easy to backport. On top of this we can work on enabling (and testing!) proper compacted format support, without backporting pressure, on top of the new, cleaned up FPU code. Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 009183276bb7..6185d3141219 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -173,6 +173,21 @@ static void init_thread_xstate(void) xstate_size = sizeof(struct i387_fxsave_struct); else xstate_size = sizeof(struct i387_fsave_struct); + + /* + * Quirk: we don't yet handle the XSAVES* instructions + * correctly, as we don't correctly convert between + * standard and compacted format when interfacing + * with user-space - so disable it for now. + * + * The difference is small: with recent CPUs the + * compacted format is only marginally smaller than + * the standard FPU state format. + * + * ( This is easy to backport while we are fixing + * XSAVES* support. ) + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* From 26ba61f871b4aa9958bcebffcbeb558094d75928 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 19 May 2015 17:42:02 -0700 Subject: [PATCH 212/481] HID: wacom: fix an Oops caused by wacom_wac_finger_count_touches We assumed all touch interfaces report touch data. But, Bamboo and Intuos non-touch devices report express keys on touch interface. We need to check touch_max before counting touches. Reported-by: Tasos Sahanidis Signed-off-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index fa54d3290659..adf959dcfa5d 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1072,6 +1072,9 @@ static int wacom_wac_finger_count_touches(struct wacom_wac *wacom) int count = 0; int i; + if (!touch_max) + return 0; + /* non-HID_GENERIC single touch input doesn't call this routine */ if ((touch_max == 1) && (wacom->features.type == HID_GENERIC)) return wacom->hid_data.tipswitch && From e8fd5e9e9984675f45b9a5485909c143fbde248f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 8 May 2015 14:32:56 +0200 Subject: [PATCH 213/481] kvm: fix crash in kvm_vcpu_reload_apic_access_page memslot->userfault_addr is set by the kernel with a mmap executed from the kernel but the userland can still munmap it and lead to the below oops after memslot->userfault_addr points to a host virtual address that has no vma or mapping. [ 327.538306] BUG: unable to handle kernel paging request at fffffffffffffffe [ 327.538407] IP: [] put_page+0x5/0x50 [ 327.538474] PGD 1a01067 PUD 1a03067 PMD 0 [ 327.538529] Oops: 0000 [#1] SMP [ 327.538574] Modules linked in: macvtap macvlan xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT iptable_filter ip_tables tun bridge stp llc rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xprtrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ipmi_devintf iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp dcdbas intel_rapl kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr sb_edac edac_core ipmi_si ipmi_msghandler acpi_pad wmi acpi_power_meter lpc_ich mfd_core mei_me [ 327.539488] mei shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc mlx4_ib ib_sa ib_mad ib_core mlx4_en vxlan ib_addr ip_tunnel xfs libcrc32c sd_mod crc_t10dif crct10dif_common crc32c_intel mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ttm drm ahci i2c_core libahci mlx4_core libata tg3 ptp pps_core megaraid_sas ntb dm_mirror dm_region_hash dm_log dm_mod [ 327.539956] CPU: 3 PID: 3161 Comm: qemu-kvm Not tainted 3.10.0-240.el7.userfault19.4ca4011.x86_64.debug #1 [ 327.540045] Hardware name: Dell Inc. PowerEdge R420/0CN7CM, BIOS 2.1.2 01/20/2014 [ 327.540115] task: ffff8803280ccf00 ti: ffff880317c58000 task.ti: ffff880317c58000 [ 327.540184] RIP: 0010:[] [] put_page+0x5/0x50 [ 327.540261] RSP: 0018:ffff880317c5bcf8 EFLAGS: 00010246 [ 327.540313] RAX: 00057ffffffff000 RBX: ffff880616a20000 RCX: 0000000000000000 [ 327.540379] RDX: 0000000000002014 RSI: 00057ffffffff000 RDI: fffffffffffffffe [ 327.540445] RBP: ffff880317c5bd10 R08: 0000000000000103 R09: 0000000000000000 [ 327.540511] R10: 0000000000000000 R11: 0000000000000000 R12: fffffffffffffffe [ 327.540576] R13: 0000000000000000 R14: ffff880317c5bd70 R15: ffff880317c5bd50 [ 327.540643] FS: 00007fd230b7f700(0000) GS:ffff880630800000(0000) knlGS:0000000000000000 [ 327.540717] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 327.540771] CR2: fffffffffffffffe CR3: 000000062a2c3000 CR4: 00000000000427e0 [ 327.540837] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 327.540904] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 327.540974] Stack: [ 327.541008] ffffffffa05d6d0c ffff880616a20000 0000000000000000 ffff880317c5bdc0 [ 327.541093] ffffffffa05ddaa2 0000000000000000 00000000002191bf 00000042f3feab2d [ 327.541177] 00000042f3feab2d 0000000000000002 0000000000000001 0321000000000000 [ 327.541261] Call Trace: [ 327.541321] [] ? kvm_vcpu_reload_apic_access_page+0x6c/0x80 [kvm] [ 327.543615] [] vcpu_enter_guest+0x3f2/0x10f0 [kvm] [ 327.545918] [] kvm_arch_vcpu_ioctl_run+0x2b0/0x5a0 [kvm] [ 327.548211] [] ? kvm_arch_vcpu_ioctl_run+0xa2/0x5a0 [kvm] [ 327.550500] [] kvm_vcpu_ioctl+0x2b5/0x680 [kvm] [ 327.552768] [] ? creds_are_invalid.part.1+0x12/0x50 [ 327.555069] [] ? creds_are_invalid+0x21/0x30 [ 327.557373] [] ? inode_has_perm.isra.49.constprop.65+0x26/0x80 [ 327.559663] [] do_vfs_ioctl+0x305/0x530 [ 327.561917] [] SyS_ioctl+0xa1/0xc0 [ 327.564185] [] system_call_fastpath+0x16/0x1b [ 327.566480] Code: 0b 31 f6 4c 89 e7 e8 4b 7f ff ff 0f 0b e8 24 fd ff ff e9 a9 fd ff ff 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <48> f7 07 00 c0 00 00 55 48 89 e5 75 2a 8b 47 1c 85 c0 74 1e f0 Signed-off-by: Andrea Arcangeli Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 986b3f5d0523..5f3818846465 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6195,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) return; page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); /* From 0fdd74f7784b5cdff7075736992bbb149b1ae49c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 May 2015 11:33:43 +0200 Subject: [PATCH 214/481] Revert "KVM: x86: drop fpu_activate hook" This reverts commit 4473b570a7ebb502f63f292ccfba7df622e5fdd3. We'll use the hook again. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 1 + arch/x86/kvm/vmx.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e61c3a4ee131..5a1faf3f043e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -744,6 +744,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce741b8650f6..9afa233b5482 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .fpu_activate = svm_fpu_activate, .fpu_deactivate = svm_fpu_deactivate, .tlb_flush = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b61687bd79..2d73807f0d31 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .fpu_activate = vmx_fpu_activate, .fpu_deactivate = vmx_fpu_deactivate, .tlb_flush = vmx_flush_tlb, From c447e76b4cabb49ddae8e49c5758f031f35d55fb Mon Sep 17 00:00:00 2001 From: Liang Li Date: Thu, 21 May 2015 04:41:25 +0800 Subject: [PATCH 215/481] kvm/fpu: Enable eager restore kvm FPU for MPX The MPX feature requires eager KVM FPU restore support. We have verified that MPX cannot work correctly with the current lazy KVM FPU restore mechanism. Eager KVM FPU restore should be enabled if the MPX feature is exposed to VM. Signed-off-by: Yang Zhang Signed-off-by: Liang Li [Also activate the FPU on AMD processors. - Paolo] Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 4 ++++ arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/x86.c | 16 ++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5a1faf3f043e..f4a555beef19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -401,6 +401,7 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; + bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59b69f6a2844..1d08ad3582d0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,6 +16,8 @@ #include #include #include +#include /* For use_eager_fpu. Ugh! */ +#include /* For use_eager_fpu. Ugh! */ #include #include #include "cpuid.h" @@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c3b1ad9fca81..496b3695d3d3 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->ebx & bit(X86_FEATURE_RTM)); } + +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f3818846465..ea306adbbc13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) fpu_save_init(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + if (!vcpu->arch.eager_fpu) + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + trace_kvm_fpu(0); } @@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { + struct kvm_vcpu *vcpu; + if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); - return kvm_x86_ops->vcpu_create(kvm, id); + + vcpu = kvm_x86_ops->vcpu_create(kvm, id); + + /* + * Activate fpu unconditionally in case the guest needs eager FPU. It will be + * deactivated soon if it doesn't. + */ + kvm_x86_ops->fpu_activate(vcpu); + return vcpu; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) From 3bfe049807c240344b407e3cfb74544927359817 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Sun, 17 May 2015 14:30:31 -0700 Subject: [PATCH 216/481] netfilter: nfnetlink_{log,queue}: Register pernet in first place nfnetlink_{log,queue}_init() register the netlink callback nf*_rcv_nl_event before registering the pernet_subsys, but the callback relies on data structures allocated by pernet init functions. When nfnetlink_{log,queue} is loaded, if a netlink message is received after the netlink callback is registered but before the pernet_subsys is registered, the kernel will panic in the sequence nfulnl_rcv_nl_event nfnl_log_pernet net_generic BUG_ON(id == 0) where id is nfnl_log_net_id. The panic can be easily reproduced in 4.0.3 by: while true ;do modprobe nfnetlink_log ; rmmod nfnetlink_log ; done & while true ;do ip netns add dummy ; ip netns del dummy ; done & This patch moves register_pernet_subsys to earlier in nfnetlink_log_init. Notice that the BUG_ON hit in 4.0.3 was recently removed in 2591ffd308 ["netns: remove BUG_ONs from net_generic()"]. Signed-off-by: Francesco Ruggeri Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 19 ++++++++++--------- net/netfilter/nfnetlink_queue_core.c | 18 +++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3ad91266c821..4ef1fae8445e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1073,7 +1073,13 @@ static struct pernet_operations nfnl_log_net_ops = { static int __init nfnetlink_log_init(void) { - int status = -ENOMEM; + int status; + + status = register_pernet_subsys(&nfnl_log_net_ops); + if (status < 0) { + pr_err("failed to register pernet ops\n"); + goto out; + } netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); @@ -1088,28 +1094,23 @@ static int __init nfnetlink_log_init(void) goto cleanup_subsys; } - status = register_pernet_subsys(&nfnl_log_net_ops); - if (status < 0) { - pr_err("failed to register pernet ops\n"); - goto cleanup_logger; - } return status; -cleanup_logger: - nf_log_unregister(&nfulnl_logger); cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfulnl_rtnl_notifier); + unregister_pernet_subsys(&nfnl_log_net_ops); +out: return status; } static void __exit nfnetlink_log_fini(void) { - unregister_pernet_subsys(&nfnl_log_net_ops); nf_log_unregister(&nfulnl_logger); nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); + unregister_pernet_subsys(&nfnl_log_net_ops); } MODULE_DESCRIPTION("netfilter userspace logging"); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 0b98c7420239..11c7682fa0ea 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -1317,7 +1317,13 @@ static struct pernet_operations nfnl_queue_net_ops = { static int __init nfnetlink_queue_init(void) { - int status = -ENOMEM; + int status; + + status = register_pernet_subsys(&nfnl_queue_net_ops); + if (status < 0) { + pr_err("nf_queue: failed to register pernet ops\n"); + goto out; + } netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); @@ -1326,19 +1332,13 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_notifier; } - status = register_pernet_subsys(&nfnl_queue_net_ops); - if (status < 0) { - pr_err("nf_queue: failed to register pernet ops\n"); - goto cleanup_subsys; - } register_netdevice_notifier(&nfqnl_dev_notifier); nf_register_queue_handler(&nfqh); return status; -cleanup_subsys: - nfnetlink_subsys_unregister(&nfqnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); +out: return status; } @@ -1346,9 +1346,9 @@ static void __exit nfnetlink_queue_fini(void) { nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); - unregister_pernet_subsys(&nfnl_queue_net_ops); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); + unregister_pernet_subsys(&nfnl_queue_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } From 1086bbe97a074844188c6c988fa0b1a98c3ccbb9 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 19 May 2015 20:55:17 -0400 Subject: [PATCH 217/481] netfilter: ensure number of counters is >0 in do_replace() After improving setsockopt() coverage in trinity, I started triggering vmalloc failures pretty reliably from this code path: warn_alloc_failed+0xe9/0x140 __vmalloc_node_range+0x1be/0x270 vzalloc+0x4b/0x50 __do_replace+0x52/0x260 [ip_tables] do_ipt_set_ctl+0x15d/0x1d0 [ip_tables] nf_setsockopt+0x65/0x90 ip_setsockopt+0x61/0xa0 raw_setsockopt+0x16/0x60 sock_common_setsockopt+0x14/0x20 SyS_setsockopt+0x71/0xd0 It turns out we don't validate that the num_counters field in the struct we pass in from userspace is initialized. The same problem also exists in ebtables, arptables, ipv6, and the compat variants. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 ++++ net/ipv4/netfilter/arp_tables.c | 6 ++++++ net/ipv4/netfilter/ip_tables.c | 6 ++++++ net/ipv6/netfilter/ip6_tables.c | 6 ++++++ 4 files changed, 22 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 91180a7fc943..24c7c96bf5f8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1117,6 +1117,8 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; tmp.name[sizeof(tmp.name) - 1] = 0; @@ -2159,6 +2161,8 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 13bfe84bf3ca..a61200754f4b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c69db7fa25ee..2d0e265fef6e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1a732a1d3c8e..62f5b0d0bc9b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1275,6 +1275,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1822,6 +1825,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); From faecbb45ebefb20260ad4a631e011e93c896cb73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 May 2015 13:42:25 +0200 Subject: [PATCH 218/481] Revert "netfilter: bridge: query conntrack about skb dnat" This reverts commit c055d5b03bb4cb69d349d787c9787c0383abd8b2. There are two issues: 'dnat_took_place' made me think that this is related to -j DNAT/MASQUERADE. But thats only one part of the story. This is also relevant for SNAT when we undo snat translation in reverse/reply direction. Furthermore, I originally wanted to do this mainly to avoid storing ipv6 addresses once we make DNAT/REDIRECT work for ipv6 on bridges. However, I forgot about SNPT/DNPT which is stateless. So we can't escape storing address for ipv6 anyway. Might as well do it for ipv4 too. Reported-and-tested-by: Bernhard Thaler Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..f15154a879c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -176,6 +176,7 @@ struct nf_bridge_info { struct net_device *physindev; struct net_device *physoutdev; char neigh_header[8]; + __be32 ipv4_daddr; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab55e2472beb..60ddfbeb47f5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,10 +37,6 @@ #include #include -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#include -#endif - #include #include "br_private.h" #ifdef CONFIG_SYSCTL @@ -350,24 +346,15 @@ free_skb: return 0; } -static bool dnat_took_place(const struct sk_buff *skb) +static bool daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) { -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_untracked(ct)) - return false; - - return test_bit(IPS_DST_NAT_BIT, &ct->status); -#else - return false; -#endif + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. + * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge @@ -421,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { + if (daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -632,6 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); @@ -669,6 +657,9 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, From 252ec2b3aa6f6a9ac29c6539027db600c11bf45e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 May 2015 13:36:38 +0200 Subject: [PATCH 219/481] mac80211: don't split remain-on-channel for coalescing Due to remain-on-channel scheduling delays, when we split an ROC while coalescing, we'll usually get a picture like this: existing ROC: |------------------| current time: ^ new ROC: |------| |-------| If the expected response frames are then transmitted by the peer in the hole between the two fragments of the new ROC, we miss them and the process (e.g. ANQP query) fails. mac80211 expects that the window to miss something is small: existing ROC: |------------------| new ROC: |------||-------| but that's normally not the case. To avoid this problem, coalesce only if the new ROC's duration is <= the remaining time on the existing one: existing ROC: |------------------| new ROC: |-----| and never split a new one but schedule it afterwards instead: existing ROC: |------------------| new ROC: |-------------| type=bugfix bug=not-tracked fixes=unknown Reported-by: Matti Gottlieb Reviewed-by: EliadX Peller Reviewed-by: Matti Gottlieb Tested-by: Matti Gottlieb Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 59 +++++++------------------------------- net/mac80211/ieee80211_i.h | 6 ---- 2 files changed, 11 insertions(+), 54 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 265e42721a66..ff347a0eebd4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2495,51 +2495,22 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, struct ieee80211_roc_work *new_roc, struct ieee80211_roc_work *cur_roc) { - unsigned long j = jiffies; - unsigned long cur_roc_end = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration); - struct ieee80211_roc_work *next_roc; - int new_dur; + unsigned long now = jiffies; + unsigned long remaining = cur_roc->hw_start_time + + msecs_to_jiffies(cur_roc->duration) - + now; if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) return false; - if (time_after(j + IEEE80211_ROC_MIN_LEFT, cur_roc_end)) + /* if it doesn't fit entirely, schedule a new one */ + if (new_roc->duration > jiffies_to_msecs(remaining)) return false; ieee80211_handle_roc_started(new_roc); - new_dur = new_roc->duration - jiffies_to_msecs(cur_roc_end - j); - - /* cur_roc is long enough - add new_roc to the dependents list. */ - if (new_dur <= 0) { - list_add_tail(&new_roc->list, &cur_roc->dependents); - return true; - } - - new_roc->duration = new_dur; - - /* - * if cur_roc was already coalesced before, we might - * want to extend the next roc instead of adding - * a new one. - */ - next_roc = list_entry(cur_roc->list.next, - struct ieee80211_roc_work, list); - if (&next_roc->list != &local->roc_list && - next_roc->chan == new_roc->chan && - next_roc->sdata == new_roc->sdata && - !WARN_ON(next_roc->started)) { - list_add_tail(&new_roc->list, &next_roc->dependents); - next_roc->duration = max(next_roc->duration, - new_roc->duration); - next_roc->type = max(next_roc->type, new_roc->type); - return true; - } - - /* add right after cur_roc */ - list_add(&new_roc->list, &cur_roc->list); - + /* add to dependents so we send the expired event properly */ + list_add_tail(&new_roc->list, &cur_roc->dependents); return true; } @@ -2652,17 +2623,9 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * In the offloaded ROC case, if it hasn't begun, add * this new one to the dependent list to be handled * when the master one begins. If it has begun, - * check that there's still a minimum time left and - * if so, start this one, transmitting the frame, but - * add it to the list directly after this one with - * a reduced time so we'll ask the driver to execute - * it right after finishing the previous one, in the - * hope that it'll also be executed right afterwards, - * effectively extending the old one. - * If there's no minimum time left, just add it to the - * normal list. - * TODO: the ROC type is ignored here, assuming that it - * is better to immediately use the current ROC. + * check if it fits entirely within the existing one, + * in which case it will just be dependent as well. + * Otherwise, schedule it by itself. */ if (!tmp->hw_begun) { list_add_tail(&roc->list, &tmp->dependents); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cdc374296245..c0a9187bc3a9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -328,12 +328,6 @@ struct mesh_preq_queue { u8 flags; }; -#if HZ/100 == 0 -#define IEEE80211_ROC_MIN_LEFT 1 -#else -#define IEEE80211_ROC_MIN_LEFT (HZ/100) -#endif - struct ieee80211_roc_work { struct list_head list; struct list_head dependents; From f9dca80b98caac8b4bfb43a2edf1e9f877ccf322 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Wed, 13 May 2015 09:16:48 +0000 Subject: [PATCH 220/481] mac80211: fix AP_VLAN crypto tailroom calculation Some splats I was seeing: (a) WARNING: CPU: 1 PID: 0 at /devel/src/linux/net/mac80211/wep.c:102 ieee80211_wep_add_iv (b) WARNING: CPU: 1 PID: 0 at /devel/src/linux/net/mac80211/wpa.c:73 ieee80211_tx_h_michael_mic_add (c) WARNING: CPU: 3 PID: 0 at /devel/src/linux/net/mac80211/wpa.c:433 ieee80211_crypto_ccmp_encrypt I've seen (a) and (b) with ath9k hw crypto and (c) with ath9k sw crypto. All of them were related to insufficient skb tailroom and I was able to trigger these with ping6 program. AP_VLANs may inherit crypto keys from parent AP. This wasn't considered and yielded problems in some setups resulting in inability to transmit data because mac80211 wouldn't resize skbs when necessary and subsequently drop some packets due to insufficient tailroom. For efficiency purposes don't inspect both AP_VLAN and AP sdata looking for tailroom counter. Instead update AP_VLAN tailroom counters whenever their master AP tailroom counter changes. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 6 ++++ net/mac80211/key.c | 82 +++++++++++++++++++++++++++++++++++++++----- net/mac80211/key.h | 1 + net/mac80211/util.c | 3 ++ 4 files changed, 83 insertions(+), 9 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bab5c63c0bad..84cef600c573 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -522,6 +522,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; + + mutex_lock(&local->key_mtx); + sdata->crypto_tx_tailroom_needed_cnt += + master->crypto_tx_tailroom_needed_cnt; + mutex_unlock(&local->key_mtx); + break; } case NL80211_IFTYPE_AP: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2291cd730091..a907f2d5c12d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -58,6 +58,22 @@ static void assert_key_lock(struct ieee80211_local *local) lockdep_assert_held(&local->key_mtx); } +static void +update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) +{ + struct ieee80211_sub_if_data *vlan; + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return; + + mutex_lock(&sdata->local->mtx); + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + vlan->crypto_tx_tailroom_needed_cnt += delta; + + mutex_unlock(&sdata->local->mtx); +} + static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) { /* @@ -79,6 +95,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + update_vlan_tailroom_need_count(sdata, 1); + if (!sdata->crypto_tx_tailroom_needed_cnt++) { /* * Flush all XMIT packets currently using HW encryption or no @@ -88,6 +106,15 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) } } +static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, + int delta) +{ + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); + + update_vlan_tailroom_need_count(sdata, -delta); + sdata->crypto_tx_tailroom_needed_cnt -= delta; +} + static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; @@ -144,7 +171,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) - sdata->crypto_tx_tailroom_needed_cnt--; + decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); @@ -541,7 +568,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key, schedule_delayed_work(&sdata->dec_tailroom_needed_wk, HZ/2); } else { - sdata->crypto_tx_tailroom_needed_cnt--; + decrease_tailroom_need_count(sdata, 1); } } @@ -631,6 +658,7 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; + struct ieee80211_sub_if_data *vlan; ASSERT_RTNL(); @@ -639,7 +667,14 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt = 0; + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || + sdata->crypto_tx_tailroom_pending_dec); + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || + vlan->crypto_tx_tailroom_pending_dec); + } list_for_each_entry(key, &sdata->key_list, list) { increment_tailroom_need_count(sdata); @@ -649,6 +684,22 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->key_mtx); } +void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *vlan; + + mutex_lock(&sdata->local->key_mtx); + + sdata->crypto_tx_tailroom_needed_cnt = 0; + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + vlan->crypto_tx_tailroom_needed_cnt = 0; + } + + mutex_unlock(&sdata->local->key_mtx); +} + void ieee80211_iter_keys(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, @@ -688,8 +739,8 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_key *key, *tmp; - sdata->crypto_tx_tailroom_needed_cnt -= - sdata->crypto_tx_tailroom_pending_dec; + decrease_tailroom_need_count(sdata, + sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; ieee80211_debugfs_key_remove_mgmt_default(sdata); @@ -709,6 +760,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *vlan; + struct ieee80211_sub_if_data *master; struct ieee80211_key *key, *tmp; LIST_HEAD(keys); @@ -728,8 +780,20 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, false); - WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || - sdata->crypto_tx_tailroom_pending_dec); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + if (sdata->bss) { + master = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt != + master->crypto_tx_tailroom_needed_cnt); + } + } else { + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || + sdata->crypto_tx_tailroom_pending_dec); + } + if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || @@ -793,8 +857,8 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk) */ mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt -= - sdata->crypto_tx_tailroom_pending_dec; + decrease_tailroom_need_count(sdata, + sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index c5a31835be0e..96557dd1e77d 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -161,6 +161,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 79412f16b61d..b864ebc6ab8f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2022,6 +2022,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); /* add back keys */ + list_for_each_entry(sdata, &local->interfaces, list) + ieee80211_reset_crypto_tx_tailroom(sdata); + list_for_each_entry(sdata, &local->interfaces, list) if (ieee80211_sdata_running(sdata)) ieee80211_enable_keys(sdata); From c411ead995b46f361b92116fd042ae83866044a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 13 May 2015 07:36:38 +0200 Subject: [PATCH 221/481] ssb: extend fix for PCI related silent reboots to all chipsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent fix for BCM4704 reboots has to be extended as the same problem affects Linksys WRT350N v1 (BCM4705). Signed-off-by: Rafał Miłecki Reported-by: Daniel Gimpelevich Signed-off-by: Kalle Valo --- drivers/ssb/driver_pcicore.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index 15a7ee3859dd..5fe1c22e289b 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c @@ -359,12 +359,13 @@ static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc) /* * Accessing PCI config without a proper delay after devices reset (not - * GPIO reset) was causing reboots on WRT300N v1.0. + * GPIO reset) was causing reboots on WRT300N v1.0 (BCM4704). * Tested delay 850 us lowered reboot chance to 50-80%, 1000 us fixed it * completely. Flushing all writes was also tested but with no luck. + * The same problem was reported for WRT350N v1 (BCM4705), so we just + * sleep here unconditionally. */ - if (pc->dev->bus->chip_id == 0x4704) - usleep_range(1000, 2000); + usleep_range(1000, 2000); /* Enable PCI bridge BAR0 prefetch and burst */ val = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY; From 1dc92c450a53f120b67296cb4b29c1dfdc665ac1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 May 2015 09:32:21 -0500 Subject: [PATCH 222/481] [cifs] fix null pointer check Dan Carpenter pointed out an inconsistent null pointer check in smb2_hdr_assemble that was pointed out by static checker. Signed-off-by: Steve French Reviewed-by: Sachin Prabhu CC: Dan Carpenter w --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 65cd7a84c8bc..54cbe19d9c08 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ - if ((tcon->ses) && + if ((tcon->ses) && (tcon->ses->server) && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) hdr->CreditCharge = cpu_to_le16(1); /* else CreditCharge MBZ */ From 35f1b4e96b9258a3668872b1139c51e5a23eb876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 18 May 2015 20:53:55 +0200 Subject: [PATCH 223/481] ipv6: do not delete previously existing ECMP routes if add fails If adding a nexthop of an IPv6 multipath route fails, comment in ip6_route_multipath() says we are going to delete all nexthops already added. However, current implementation deletes even the routes it hasn't even tried to add yet. For example, running ip route add 1234:5678::/64 \ nexthop via fe80::aa dev dummy1 \ nexthop via fe80::bb dev dummy1 \ nexthop via fe80::cc dev dummy1 twice results in removing all routes first command added. Limit the second (delete) run to nexthops that succeeded in the first (add) run. Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") Signed-off-by: Michal Kubecek Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d3588885f097..3821a3517478 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2504,9 +2504,9 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) int attrlen; int err = 0, last_err = 0; + remaining = cfg->fc_mp_len; beginning: rtnh = (struct rtnexthop *)cfg->fc_mp; - remaining = cfg->fc_mp_len; /* Parse a Multipath Entry */ while (rtnh_ok(rtnh, remaining)) { @@ -2536,6 +2536,7 @@ beginning: * next hops that have been already added. */ add = 0; + remaining = cfg->fc_mp_len - remaining; goto beginning; } } From 27596472473a02cfef2908a6bcda7e55264ba6b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 18 May 2015 20:54:00 +0200 Subject: [PATCH 224/481] ipv6: fix ECMP route replacement When replacing an IPv6 multipath route with "ip route replace", i.e. NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first matching route without fixing its siblings, resulting in corrupted siblings linked list; removing one of the siblings can then end in an infinite loop. IPv6 ECMP implementation is a bit different from IPv4 so that route replacement cannot work in exactly the same way. This should be a reasonable approximation: 1. If the new route is ECMP-able and there is a matching ECMP-able one already, replace it and all its siblings (if any). 2. If the new route is ECMP-able and no matching ECMP-able route exists, replace first matching non-ECMP-able (if any) or just add the new one. 3. If the new route is not ECMP-able, replace first matching non-ECMP-able route (if any) or add the new route. We also need to remove the NLM_F_REPLACE flag after replacing old route(s) by first nexthop of an ECMP route so that each subsequent nexthop does not replace previous one. Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") Signed-off-by: Michal Kubecek Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 39 +++++++++++++++++++++++++++++++++++++-- net/ipv6/route.c | 11 +++++++---- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 96dbffff5a24..bde57b113009 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, { struct rt6_info *iter = NULL; struct rt6_info **ins; + struct rt6_info **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_EXCL)) return -EEXIST; if (replace) { - found++; - break; + if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { + found++; + break; + } + if (rt_can_ecmp) + fallback_ins = fallback_ins ?: ins; + goto next_iter; } if (iter->dst.dev == rt->dst.dev && @@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (iter->rt6i_metric > rt->rt6i_metric) break; +next_iter: ins = &iter->dst.rt6_next; } + if (fallback_ins && !found) { + /* No ECMP-able route found, replace first non-ECMP one */ + ins = fallback_ins; + iter = *ins; + found++; + } + /* Reset round-robin state, if necessary */ if (ins == &fn->leaf) fn->rr_ptr = NULL; @@ -815,6 +829,8 @@ add: } } else { + int nsiblings; + if (!found) { if (add) goto add; @@ -835,8 +851,27 @@ add: info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } + nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); + + if (nsiblings) { + /* Replacing an ECMP route, remove all siblings */ + ins = &rt->dst.rt6_next; + iter = *ins; + while (iter) { + if (rt6_qualify_for_ecmp(iter)) { + *ins = iter->dst.rt6_next; + fib6_purge_rt(iter, fn, info->nl_net); + rt6_release(iter); + nsiblings--; + } else { + ins = &iter->dst.rt6_next; + } + iter = *ins; + } + WARN_ON(nsiblings != 0); + } } return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3821a3517478..c73ae5039e46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2541,11 +2541,14 @@ beginning: } } /* Because each route is added like a single route we remove - * this flag after the first nexthop (if there is a collision, - * we have already fail to add the first nexthop: - * fib6_add_rt2node() has reject it). + * these flags after the first nexthop: if there is a collision, + * we have already failed to add the first nexthop: + * fib6_add_rt2node() has rejected it; when replacing, old + * nexthops have been replaced by first new, the rest should + * be added to it. */ - cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | + NLM_F_REPLACE); rtnh = rtnh_next(rtnh, &remaining); } From c15e10e71ce3b4ee78d85d80102a9621cde1edbd Mon Sep 17 00:00:00 2001 From: Tim Beale Date: Mon, 18 May 2015 15:38:38 +1200 Subject: [PATCH 225/481] net: phy: Make sure phy_start() always re-enables the phy interrupts This is an alternative way of fixing: commit db9683fb412d ("net: phy: Make sure PHY_RESUMING state change is always processed") When the PHY state transitions from PHY_HALTED to PHY_RESUMING, there are two things we need to do: 1). Re-enable interrupts (and power up the physical link, if powered down) 2). Update the PHY state and net-device based on the link status. There's no strict reason why #1 has to be done from within the main phy_state_machine() function. There is a risk that other changes to the PHY (e.g. setting speed/duplex, which calls phy_start_aneg()) could cause a subsequent state transition before phy_state_machine() has processed the PHY_RESUMING state change. This would leave the PHY with interrupts disabled and/or still in the BMCR_PDOWN/low-power mode. Moving enabling the interrupts and phy_resume() into phy_start() will guarantee this work always gets done. As the PHY is already in the HALTED state and interrupts are disabled, it shouldn't conflict with any work being done in phy_state_machine(). The downside of this change is that if the PHY_RESUMING state is ever entered from anywhere else, it'll also have to repeat this work. Signed-off-by: Tim Beale Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 710696d1af97..47cd578052fc 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -465,7 +465,7 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (phydev->state != PHY_HALTED && phydev->state != PHY_RESUMING) { + if (phydev->state != PHY_HALTED) { if (AUTONEG_ENABLE == phydev->autoneg) { phydev->state = PHY_AN; phydev->link_timeout = PHY_AN_TIMEOUT; @@ -742,6 +742,9 @@ EXPORT_SYMBOL(phy_stop); */ void phy_start(struct phy_device *phydev) { + bool do_resume = false; + int err = 0; + mutex_lock(&phydev->lock); switch (phydev->state) { @@ -752,11 +755,22 @@ void phy_start(struct phy_device *phydev) phydev->state = PHY_UP; break; case PHY_HALTED: + /* make sure interrupts are re-enabled for the PHY */ + err = phy_enable_interrupts(phydev); + if (err < 0) + break; + phydev->state = PHY_RESUMING; + do_resume = true; + break; default: break; } mutex_unlock(&phydev->lock); + + /* if phy was suspended, bring the physical link up again */ + if (do_resume) + phy_resume(phydev); } EXPORT_SYMBOL(phy_start); @@ -769,7 +783,7 @@ void phy_state_machine(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct phy_device *phydev = container_of(dwork, struct phy_device, state_queue); - bool needs_aneg = false, do_suspend = false, do_resume = false; + bool needs_aneg = false, do_suspend = false; int err = 0; mutex_lock(&phydev->lock); @@ -888,14 +902,6 @@ void phy_state_machine(struct work_struct *work) } break; case PHY_RESUMING: - err = phy_clear_interrupt(phydev); - if (err) - break; - - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - if (err) - break; - if (AUTONEG_ENABLE == phydev->autoneg) { err = phy_aneg_done(phydev); if (err < 0) @@ -933,7 +939,6 @@ void phy_state_machine(struct work_struct *work) } phydev->adjust_link(phydev->attached_dev); } - do_resume = true; break; } @@ -943,8 +948,6 @@ void phy_state_machine(struct work_struct *work) err = phy_start_aneg(phydev); else if (do_suspend) phy_suspend(phydev); - else if (do_resume) - phy_resume(phydev); if (err < 0) phy_error(phydev); From 65c3b205ebe06a389a29544e71a8071da0ce4155 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Apr 2015 17:30:24 +0300 Subject: [PATCH 226/481] CIFS: remove an unneeded NULL check Smatch complains because we dereference "ses->server" without checking some lines earlier inside the call to get_next_mid(ses->server). fs/cifs/cifssmb.c:4921 CIFSGetDFSRefer() warn: variable dereferenced before check 'ses->server' (see line 4899) There is only one caller for this function get_dfs_path() and it always passes a non-null "ses->server" pointer so this NULL check can be removed. Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1091affba425..f26ffbfc64d8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4918,7 +4918,7 @@ getDFSRetry: strncpy(pSMB->RequestFileName, search_name, name_len); } - if (ses->server && ses->server->sign) + if (ses->server->sign) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; pSMB->hdr.Uid = ses->Suid; From c29ed5a4566fc7e0c5d06324d62974c6163d1e06 Mon Sep 17 00:00:00 2001 From: Ted Kim Date: Thu, 14 May 2015 12:49:01 -0700 Subject: [PATCH 227/481] ib/cm: Change reject message type when destroying cm_id Problem reported by: Ted Kim : We have a case where a Linux system and a non-Linux system are trying to interoperate. The Linux host is the active side and starts the connection establishment, but later decides to not go through with the connection setup and does rdma_destroy_id(). The rdma_destroy_id() eventually works its way down to cm_destroy_id() in core/cm.c, where a REJ is sent. The non-Linux system has some trouble recognizing the REJ because of: A. CM states which can't receive the REJ B. Some issues about REJ formatting (missing comm ID) ISSUE A: That part of the spec says, a Consumer Reject REJ can be sent for a connection abort, but it goes further and says: can send a REJ message with a "Consumer Reject" Reason code if they are in a CM state (i.e. REP Rcvd, MRA(REP) Sent, REQ Rcvd, MRA Sent) that allows a REJ to be sent (lines 35-38). Of the states listed there in that sentence, it would seem to limit the active side to using the Consumer Reject (for the abort case) in just the REP-Rcvd and MRA-REP-Sent states. That is basically only after the active side sees a REP (or alternatively goes down the state transitions to timeout in which case a Timeout REJ is sent). As a fix, in cm-destroy-id() move the IB-CM-MRA-REQ-RCVD case to the same as REQ-SENT. Essentially, make a REJ sent after getting an MRA on active side a timeout rather than Consumer- Reject, which is arguably more correct with the CM state diagrams previous to getting a REP. Signed-off-by: Ted Kim Signed-off-by: Sean Hefty --- drivers/infiniband/core/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0c1419105ff0..0271608a51c4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -861,6 +861,7 @@ retest: cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, @@ -879,7 +880,6 @@ retest: NULL, 0, NULL, 0); } break; - case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); From 3d76be5b933e2a66d85a2f7444e68e99e8a48ad4 Mon Sep 17 00:00:00 2001 From: Robert Nelson Date: Wed, 20 May 2015 10:00:10 -0700 Subject: [PATCH 228/481] ARM: dts: am335x-boneblack: disable RTC-only sleep Fixes: http://bugs.elinux.org/issues/143 Entering RTC-only sleep is only properly supported on early prototypes series (pre-A6) of the BeagleBone Black. Since rev (A6A), which include all production versions, it is not support at due to. (rev A6) enable of the 3v3b regulator moved from LDO2 to LDO4 (3v3a) side-effect: 3v3b rail remains on in sleep-mode (also in off-mode when battery-powered) (rev A6A) am335x vdds supply moved from LDO3 to LDO1 side-effect: vdds remains supplied in sleep-mode Reported-by: Matthijs van Duin Tested-by: Matthijs van Duin Signed-off-by: Robert Nelson Cc: Tony Lindgren Cc: Felipe Balbi Cc: Johan Hovold Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-boneblack.dts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 5c42d259fa68..901739fcb85a 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,7 +80,3 @@ status = "okay"; }; }; - -&rtc { - system-power-controller; -}; From ed38c6573b2ef34b3629989f7b6ac5894a010403 Mon Sep 17 00:00:00 2001 From: Anthoine Bourgeois Date: Wed, 20 May 2015 10:00:10 -0700 Subject: [PATCH 229/481] ARM: dts: omap3-devkit8000: Fix NAND DT node Add nand-ecc-opt and device-width properties to enable nand support on Devkit8000. Signed-off-by: Anthoine Bourgeois Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-devkit8000.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index 134d3f27a8ec..921de6605f07 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -110,6 +110,8 @@ nand@0,0 { reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ nand-bus-width = <16>; + gpmc,device-width = <2>; + ti,nand-ecc-opt = "sw"; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; From f25bf74c8862efdc30453d16d60cf22958a4873e Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Wed, 20 May 2015 10:00:10 -0700 Subject: [PATCH 230/481] ARM: dts: Fix WLAN interrupt line for AM335x EVM-SK While Sitara AM335x SoCs are very close to OMAP SoCs, the 32-line GPIO controllers are numbered from 0 on AM335x and from 1 on OMAP. But when the configuration for the TI WLAN controllers was converted from platform data to device tree, this detail was overlooked, as 10 boards were using OMAP with the WL12xx and WL18xx controllers, and only one was based on AM335x. This invalid configuration prevents the WL1271 module on the AM335x EVM-SK from notifying interrupts to the SoC, and breaks the wlan driver. The DTS must be corrected to use the correct GPIO controller. Signed-off-by: Romain Izard Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evmsk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 87fc7a35e802..156d05efcb70 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -654,7 +654,7 @@ wlcore: wlcore@2 { compatible = "ti,wl1271"; reg = <2>; - interrupt-parent = <&gpio1>; + interrupt-parent = <&gpio0>; interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */ ref-clock-frequency = <38400000>; }; From b0494532214bdfbf241e94fabab5dd46f7b82631 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 May 2015 17:53:10 +0300 Subject: [PATCH 231/481] libceph: request a new osdmap if lingering request maps to no osd This commit does two things. First, if there are any homeless lingering requests, we now request a new osdmap even if the osdmap that is being processed brought no changes, i.e. if a given lingering request turned homeless in one of the previous epochs and remained homeless in the current epoch. Not doing so leaves us with a stale osdmap and as a result we may miss our window for reestablishing the watch and lose notifies. MON=1 OSD=1: # cat linger-needmap.sh #!/bin/bash rbd create --size 1 test DEV=$(rbd map test) ceph osd out 0 rbd map dne/dne # obtain a new osdmap as a side effect (!) sleep 1 ceph osd in 0 rbd resize --size 2 test # rbd info test | grep size -> 2M # blockdev --getsize $DEV -> 1M N.B.: Not obtaining a new osdmap in between "osd out" and "osd in" above is enough to make it miss that resize notify, but that is a bug^Wlimitation of ceph watch/notify v1. Second, homeless lingering requests are now kicked just like those lingering requests whose mapping has changed. This is mainly to recognize that a homeless lingering request makes no sense and to preserve the invariant that a registered lingering request is not sitting on any of r_req_lru_item lists. This spares us a WARN_ON, which commit ba9d114ec557 ("libceph: clear r_req_lru_item in __unregister_linger_request()") tried to fix the _wrong_ way. Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 41a4abc7e98e..31d4b1ebff01 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2017,20 +2017,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend, err = __map_request(osdc, req, force_resend || force_resend_writes); dout("__map_request returned %d\n", err); - if (err == 0) - continue; /* no change and no osd was specified */ if (err < 0) continue; /* hrm! */ - if (req->r_osd == NULL) { - dout("tid %llu maps to no valid osd\n", req->r_tid); - needmap++; /* request a newer map */ - continue; - } + if (req->r_osd == NULL || err > 0) { + if (req->r_osd == NULL) { + dout("lingering %p tid %llu maps to no osd\n", + req, req->r_tid); + /* + * A homeless lingering request makes + * no sense, as it's job is to keep + * a particular OSD connection open. + * Request a newer map and kick the + * request, knowing that it won't be + * resent until we actually get a map + * that can tell us where to send it. + */ + needmap++; + } - dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - __register_request(osdc, req); - __unregister_linger_request(osdc, req); + dout("kicking lingering %p tid %llu osd%d\n", req, + req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + __register_request(osdc, req); + __unregister_linger_request(osdc, req); + } } reset_changed_osds(osdc); mutex_unlock(&osdc->request_mutex); From 521a04d06a729e5971cdee7f84080387ed320527 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 May 2015 17:53:34 +0300 Subject: [PATCH 232/481] Revert "libceph: clear r_req_lru_item in __unregister_linger_request()" This reverts commit ba9d114ec5578e6e99a4dfa37ff8ae688040fd64. .. which introduced a regression that prevented all lingering requests requeued in kick_requests() from ever being sent to the OSDs, resulting in a lot of missed notifies. In retrospect it's pretty obvious that r_req_lru_item item in the case of lingering requests can be used not only for notarget, but also for unsent linkage due to how tightly actual map and enqueue operations are coupled in __map_request(). The assertion that was being silenced is taken care of in the previous ("libceph: request a new osdmap if lingering request maps to no osd") commit: by always kicking homeless lingering requests we ensure that none of them ends up on the notarget list outside of the critical section guarded by request_mutex. Cc: stable@vger.kernel.org # 3.18+, needs b0494532214b "libceph: request a new osdmap if lingering request maps to no osd" Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 31d4b1ebff01..c4ec9239249a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1306,8 +1306,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } - - list_del_init(&req->r_req_lru_item); /* can be on notarget */ ceph_osdc_put_request(req); } From 153c35b6cccc0c72de9fae06c8e2c8b2c47d79d4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 May 2015 18:54:41 -0700 Subject: [PATCH 233/481] Btrfs: fix regression in raid level conversion Commit 2f0810880f082fa8ba66ab2c33b02e4ff9770a5e changed btrfs_set_block_group_ro to avoid trying to allocate new chunks with the new raid profile during conversion. This fixed failures when there was no space on the drive to allocate a new chunk, but the metadata reserves were sufficient to continue the conversion. But this ended up causing a regression when the drive had plenty of space to allocate new chunks, mostly because reduce_alloc_profile isn't using the new raid profile. Fixing btrfs_reduce_alloc_profile is a bigger patch. For now, do a partial revert of 2f0810880, and don't error out if we hit ENOSPC. Signed-off-by: Chris Mason Tested-by: Dave Sterba Reported-by: Holger Hoffstaette --- fs/btrfs/extent-tree.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45e3f086790b..0ec3acd14cbf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8829,6 +8829,24 @@ again: goto again; } + /* + * if we are changing raid levels, try to allocate a corresponding + * block group with the new raid level. + */ + alloc_flags = update_block_group_flags(root, cache->flags); + if (alloc_flags != cache->flags) { + ret = do_chunk_alloc(trans, root, alloc_flags, + CHUNK_ALLOC_FORCE); + /* + * ENOSPC is allowed here, we may have enough space + * already allocated at the new raid level to + * carry on + */ + if (ret == -ENOSPC) + ret = 0; + if (ret < 0) + goto out; + } ret = set_block_group_ro(cache, 0); if (!ret) From 7196ac113a4f38b7ca1a3282fd9edf328bd22287 Mon Sep 17 00:00:00 2001 From: Nakajima Akira Date: Wed, 22 Apr 2015 15:24:44 +0900 Subject: [PATCH 234/481] Fix to check Unique id and FileType when client refer file directly. When you refer file directly on cifs client, (e.g. ls -li , cd , stat ) the function return old inode number and filetype from old inode cache, though server has different inode number or filetype. When server is Windows, cifs client has same problem. When Server is Windows , This patch fixes bug in different filetype, but does not fix bug in different inode number. Because QUERY_PATH_INFO response by Windows does not include inode number(Index Number) . BUG INFO https://bugzilla.kernel.org/show_bug.cgi?id=90021 https://bugzilla.kernel.org/show_bug.cgi?id=90031 Reported-by: Nakajima Akira Signed-off-by: Nakajima Akira Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/inode.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ef71aa1515f6..f621b44cb800 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -401,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode, rc = -ENOMEM; } else { /* we already have inode, update it */ + + /* if uniqueid is different, return error */ + if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && + CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) { + rc = -ESTALE; + goto cgiiu_exit; + } + + /* if filetype is different, return error */ + if (unlikely(((*pinode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgiiu_exit; + } + cifs_fattr_to_inode(*pinode, &fattr); } +cgiiu_exit: return rc; } @@ -838,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, if (!*inode) rc = -ENOMEM; } else { + /* we already have inode, update it */ + + /* if filetype is different, return error */ + if (unlikely(((*inode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgii_exit; + } + cifs_fattr_to_inode(*inode, &fattr); } From 00b8c95b680791a72b4bb14dc371ff1f1daae39c Mon Sep 17 00:00:00 2001 From: Chengyu Song Date: Tue, 24 Mar 2015 20:18:49 -0400 Subject: [PATCH 235/481] cifs: potential missing check for posix_lock_file_wait posix_lock_file_wait may fail under certain circumstances, and its result is usually checked/returned. But given the complexity of cifs, I'm not sure if the result is intentially left unchecked and always expected to succeed. Signed-off-by: Chengyu Song Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5cfa7129d876..3f50cee79df9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1552,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX) - posix_lock_file_wait(file, flock); + if (flock->fl_flags & FL_POSIX && !rc) + rc = posix_lock_file_wait(file, flock); return rc; } From b29103076bec8316e155e71309dc0fba499022c6 Mon Sep 17 00:00:00 2001 From: Nakajima Akira Date: Thu, 9 Apr 2015 17:27:39 +0900 Subject: [PATCH 236/481] Fix to convert SURROGATE PAIR Garbled characters happen by using surrogate pair for filename. (replace each 1 character to ??) [Steps to Reproduce for bug] client# touch $(echo -e '\xf0\x9d\x9f\xa3') client# touch $(echo -e '\xf0\x9d\x9f\xa4') client# ls -li You see same inode number, same filename(=?? and ??) . Fix the bug about these functions do not consider about surrogate pair (and IVS). cifs_utf16_bytes() cifs_mapchar() cifs_from_utf16() cifsConvertToUTF16() Reported-by: Nakajima Akira Signed-off-by: Nakajima Akira Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 182 ++++++++++++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 46 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0303c6793d90..5a53ac6b1e02 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -27,41 +27,6 @@ #include "cifsglob.h" #include "cifs_debug.h" -/* - * cifs_utf16_bytes - how long will a string be after conversion? - * @utf16 - pointer to input string - * @maxbytes - don't go past this many bytes of input string - * @codepage - destination codepage - * - * Walk a utf16le string and return the number of bytes that the string will - * be after being converted to the given charset, not including any null - * termination required. Don't walk past maxbytes in the source buffer. - */ -int -cifs_utf16_bytes(const __le16 *from, int maxbytes, - const struct nls_table *codepage) -{ - int i; - int charlen, outlen = 0; - int maxwords = maxbytes / 2; - char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; - - for (i = 0; i < maxwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) - break; - - charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); - if (charlen > 0) - outlen += charlen; - else - outlen++; - } - - return outlen; -} - int cifs_remap(struct cifs_sb_info *cifs_sb) { int map_type; @@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target) * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). */ static int -cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, +cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, int maptype) { int len = 1; + __u16 src_char; + + src_char = *from; if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) return len; @@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, /* if character not one of seven in special remap set */ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); - if (len <= 0) { - *target = '?'; - len = 1; - } + if (len <= 0) + goto surrogate_pair; + + return len; + +surrogate_pair: + /* convert SURROGATE_PAIR and IVS */ + if (strcmp(cp->charset, "utf8")) + goto unknown; + len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); + if (len <= 0) + goto unknown; + return len; + +unknown: + *target = '?'; + len = 1; return len; } @@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; + __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ /* * because the chars can be of varying widths, we need to take care @@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) break; + if (i + 1 < fromwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < fromwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; /* * check to see if converting this character might make the @@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); outlen += charlen; + + /* charlen (=bytes of UTF-8 for 1 character) + * 4bytes UTF-8(surrogate pair) is charlen=4 + * (4bytes UTF-16 code) + * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 + * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ + if (charlen == 4) + i++; + else if (charlen >= 5) + /* 5-6bytes UTF-8 */ + i += 2; } /* properly null-terminate string */ @@ -295,6 +295,46 @@ success: return i; } +/* + * cifs_utf16_bytes - how long will a string be after conversion? + * @utf16 - pointer to input string + * @maxbytes - don't go past this many bytes of input string + * @codepage - destination codepage + * + * Walk a utf16le string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + */ +int +cifs_utf16_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage) +{ + int i; + int charlen, outlen = 0; + int maxwords = maxbytes / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp[3]; + + for (i = 0; i < maxwords; i++) { + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) + break; + if (i + 1 < maxwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < maxwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; + + charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); + outlen += charlen; + } + + return outlen; +} + /* * cifs_strndup_from_utf16 - copy a string from wire format to the local * codepage @@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, char src_char; __le16 dst_char; wchar_t tmp; + wchar_t *wchar_to; /* UTF-16 */ + int ret; + unicode_t u; if (map_chars == NO_MAP_UNI_RSVD) return cifs_strtoUTF16(target, source, PATH_MAX, cp); + wchar_to = kzalloc(6, GFP_KERNEL); + for (i = 0; i < srclen; j++) { src_char = source[i]; charlen = 1; @@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, * if no match, use question mark, which at least in * some cases serves as wild card */ - if (charlen < 1) { - dst_char = cpu_to_le16(0x003f); - charlen = 1; + if (charlen > 0) + goto ctoUTF16; + + /* convert SURROGATE_PAIR */ + if (strcmp(cp->charset, "utf8") || !wchar_to) + goto unknown; + if (*(source + i) & 0x80) { + charlen = utf8_to_utf32(source + i, 6, &u); + if (charlen < 0) + goto unknown; + } else + goto unknown; + ret = utf8s_to_utf16s(source + i, charlen, + UTF16_LITTLE_ENDIAN, + wchar_to, 6); + if (ret < 0) + goto unknown; + + i += charlen; + dst_char = cpu_to_le16(*wchar_to); + if (charlen <= 3) + /* 1-3bytes UTF-8 to 2bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + else if (charlen == 4) { + /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 + * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 + * (charlen=3+4 or 4+4) */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + } else if (charlen >= 5) { + /* 5-6bytes UTF-8 to 6bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 2)); + j++; + put_unaligned(dst_char, &target[j]); } + continue; + +unknown: + dst_char = cpu_to_le16(0x003f); + charlen = 1; } + +ctoUTF16: /* * character may take more than one byte in the source string, * but will take exactly two bytes in the target string @@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, ctoUTF16_out: put_unaligned(0, &target[j]); /* Null terminate target unicode string */ + kfree(wchar_to); return j; } From 4afe260bab50290a05e5732570329a530ed023f3 Mon Sep 17 00:00:00 2001 From: Federico Sauter Date: Tue, 17 Mar 2015 17:45:28 +0100 Subject: [PATCH 237/481] CIFS: Fix race condition on RFC1002_NEGATIVE_SESSION_RESPONSE This patch fixes a race condition that occurs when connecting to a NT 3.51 host without specifying a NetBIOS name. In that case a RFC1002_NEGATIVE_SESSION_RESPONSE is received and the SMB negotiation is reattempted, but under some conditions it leads SendReceive() to hang forever while waiting for srv_mutex. This, in turn, sets the calling process to an uninterruptible sleep state and makes it unkillable. The solution is to unlock the srv_mutex acquired in the demux thread *before* going to sleep (after the reconnect error) and before reattempting the connection. --- fs/cifs/connect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f3bfe08e177b..8383d5ea4202 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); + mutex_unlock(&server->srv_mutex); msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); @@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); } - mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; From 3ad2a5f57656a14d964b673a5a0e4ab0e583c870 Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Wed, 20 May 2015 10:13:15 -0500 Subject: [PATCH 238/481] irqchip/gicv3-its: ITS table size should not be smaller than PSZ When allocating a device table, if the requested allocation is smaller than the default granule size of the ITS then, we need to round up to the default size. Signed-off-by: Minghuan Lian [ stuart: Added comments and massaged changelog ] Signed-off-by: Stuart Yoder Reviewed-by: Marc Zygnier Cc: Cc: Link: http://lkml.kernel.org/r/1432134795-661-1-git-send-email-stuart.yoder@freescale.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3-its.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9687f8afebff..1b7e155869f6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -828,7 +828,14 @@ static int its_alloc_tables(struct its_node *its) u64 typer = readq_relaxed(its->base + GITS_TYPER); u32 ids = GITS_TYPER_DEVBITS(typer); - order = get_order((1UL << ids) * entry_size); + /* + * 'order' was initialized earlier to the default page + * granule of the the ITS. We can't have an allocation + * smaller than that. If the requested allocation + * is smaller, round up to the default page granule. + */ + order = max(get_order((1UL << ids) * entry_size), + order); if (order >= MAX_ORDER) { order = MAX_ORDER - 1; pr_warn("%s: Device Table too large, reduce its page order to %u\n", From c07678bb01374c510b0f6d4a3832c28ba33e9613 Mon Sep 17 00:00:00 2001 From: Matthew Finlay Date: Tue, 19 May 2015 00:11:48 -0700 Subject: [PATCH 239/481] IB/cma: Fix broken AF_IB UD support Support for using UD and AF_IB is currently broken. The IB_CM_SIDR_REQ_RECEIVED message is not handled properly in cma_save_net_info() and we end up falling into code that will try and process the request as ipv4/ipv6, which will end up failing. The resolution is to add a check for the SIDR_REQ and call cma_save_ib_info() with a NULL path record. Change cma_save_ib_info() to copy the src sib info from the listen_id when the path record is NULL. Reported-by: Hari Shankar Signed-off-by: Matt Finlay Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 06441a43c3aa..38ffe0981503 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -845,18 +845,26 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; ib = (struct sockaddr_ib *) &id->route.addr.src_addr; ib->sib_family = listen_ib->sib_family; - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->sgid, 16); + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + } else { + ib->sib_pkey = listen_ib->sib_pkey; + ib->sib_flowinfo = listen_ib->sib_flowinfo; + ib->sib_addr = listen_ib->sib_addr; + } ib->sib_sid = listen_ib->sib_sid; ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); ib->sib_scope_id = listen_ib->sib_scope_id; - ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; - ib->sib_family = listen_ib->sib_family; - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->dgid, 16); + if (path) { + ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); + } } static __be16 ss_get_port(const struct sockaddr_storage *ss) @@ -905,9 +913,11 @@ static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id { struct cma_hdr *hdr; - if ((listen_id->route.addr.src_addr.ss_family == AF_IB) && - (ib_event->event == IB_CM_REQ_RECEIVED)) { - cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + if (listen_id->route.addr.src_addr.ss_family == AF_IB) { + if (ib_event->event == IB_CM_REQ_RECEIVED) + cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + cma_save_ib_info(id, listen_id, NULL); return 0; } From 72eceab743cf61082357068e0686ffac66fe47e5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 May 2015 14:37:41 -0700 Subject: [PATCH 240/481] Input: alps - fix finger jumps on lifting 2 fingers on v7 touchpad On v7 touchpads sometimes when 2 fingers are moved down on the touchpad until they "fall of" the touchpad, the second touch will report 0 for y (max y really since the y axis is inverted) and max x as coordinates, rather then reporting 0, 0 as is expected for a non touching finger. This commit detects this and treats these touches as non touching. See the evemu-recording here: https://bugzilla.redhat.com/attachment.cgi?id=1025058 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1221200 Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index e6708f6efb4d..7752bd59d4b7 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -941,6 +941,11 @@ static void alps_get_finger_coordinate_v7(struct input_mt_pos *mt, case V7_PACKET_ID_TWO: mt[1].x &= ~0x000F; mt[1].y |= 0x000F; + /* Detect false-postive touches where x & y report max value */ + if (mt[1].y == 0x7ff && mt[1].x == 0xff0) { + mt[1].x = 0; + /* y gets set to 0 at the end of this function */ + } break; case V7_PACKET_ID_MULTI: From 412dbad2c7e273b48e8477247c74b2ad65c452d2 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 20 May 2015 14:46:14 -0700 Subject: [PATCH 241/481] Input: vmmouse - do not reference non-existing version of X driver The vmmouse Kconfig help text was referring to an incorrect user-space driver version. Fix this. Signed-off-by: Thomas Hellstrom Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 7462d2fc8cfe..d7820d1152d2 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -156,7 +156,7 @@ config MOUSE_PS2_VMMOUSE Say Y here if you are running under control of VMware hypervisor (ESXi, Workstation or Fusion). Also make sure that when you enable this option, you remove the xf86-input-vmmouse user-space driver - or upgrade it to at least xf86-input-vmmouse 13.0.1, which doesn't + or upgrade it to at least xf86-input-vmmouse 13.1.0, which doesn't load in the presence of an in-kernel vmmouse driver. If unsure, say N. From 487696957e3bd64ccffe62c0ac4ff7bf662785ab Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 13 May 2015 09:30:08 -0700 Subject: [PATCH 242/481] raid5: fix broken async operation chain ops_run_reconstruct6() doesn't correctly chain asyn operations. The tx returned by async_gen_syndrome should be added as the dependent tx of next stripe. The issue is introduced by commit 59fc630b8b5f9f21c8ce3ba153341c107dce1b0c RAID5: batch adjacent full stripe write Reported-and-tested-by: Maxime Ripard Signed-off-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1ba97fdc6df1..b9f2b9cc6060 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1822,7 +1822,7 @@ again: } else init_async_submit(&submit, 0, tx, NULL, NULL, to_addr_conv(sh, percpu, j)); - async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); if (!last_stripe) { j++; sh = list_first_entry(&sh->batch_list, struct stripe_head, From a81157768a00e8cf8a7b43b5ea5cac931262374f Mon Sep 17 00:00:00 2001 From: Eric Work Date: Mon, 18 May 2015 23:26:23 -0700 Subject: [PATCH 243/481] md/raid0: fix restore to sector variable in raid0_make_request The variable "sector" in "raid0_make_request()" was improperly updated by a call to "sector_div()" which modifies its first argument in place. Commit 47d68979cc968535cb87f3e5f2e6a3533ea48fbd restored this variable after the call for later re-use. Unfortunetly the restore was done after the referenced variable "bio" was advanced. This lead to the original value and the restored value being different. Here we move this line to the proper place. One observed side effect of this bug was discarding a file though unlinking would cause an unrelated file's contents to be discarded. Signed-off-by: NeilBrown Fixes: 47d68979cc96 ("md/raid0: fix bug with chunksize not a power of 2.") Cc: stable@vger.kernel.org (any that received above backport) URL: https://bugzilla.kernel.org/show_bug.cgi?id=98501 --- drivers/md/raid0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6a68ef5246d4..efb654eb5399 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -524,6 +524,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) ? (sector & (chunk_sects-1)) : sector_div(sector, chunk_sects)); + /* Restore due to sector_div */ + sector = bio->bi_iter.bi_sector; + if (sectors < bio_sectors(bio)) { split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set); bio_chain(split, bio); @@ -531,7 +534,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) split = bio; } - sector = bio->bi_iter.bi_sector; zone = find_zone(mddev->private, §or); tmp_dev = map_sector(mddev, zone, sector, §or); split->bi_bdev = tmp_dev->bdev; From 8532e3439087de69bb1b71fd6be2baa6fc196a55 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 20 May 2015 15:05:09 +1000 Subject: [PATCH 244/481] md/bitmap: remove rcu annotation from pointer arithmetic. Evaluating "&mddev->disks" is simple pointer arithmetic, so it does not need 'rcu' annotations - no dereferencing is happening. Also enhance the comment to explain that 'rdev' in that case is not actually a pointer to an rdev. Reported-by: Patrick Marlier Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2bc56e2a3526..135a0907e9de 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -177,11 +177,16 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde * nr_pending is 0 and In_sync is clear, the entries we return will * still be in the same position on the list when we re-enter * list_for_each_entry_continue_rcu. + * + * Note that if entered with 'rdev == NULL' to start at the + * beginning, we temporarily assign 'rdev' to an address which + * isn't really an rdev, but which can be used by + * list_for_each_entry_continue_rcu() to find the first entry. */ rcu_read_lock(); if (rdev == NULL) /* start at the beginning */ - rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set); + rdev = list_entry(&mddev->disks, struct md_rdev, same_set); else { /* release the previous rdev and start from there. */ rdev_dec_pending(rdev, mddev); From 755c814a7d826257d5488cfaa801ec19377c472a Mon Sep 17 00:00:00 2001 From: Stephane Viau Date: Wed, 20 May 2015 10:57:27 -0400 Subject: [PATCH 245/481] drm/msm/mdp5: fix incorrect parameter for msm_framebuffer_iova() The index of ->planes[] array (3rd parameter) cannot be equal to MAX_PLANE. This looks like a typo that is now fixed. Signed-off-by: Stephane Viau Acked-by: Rob Clark Signed-off-by: Dave Airlie --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 18a3d203b174..57b8f56ae9d0 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -273,7 +273,7 @@ static void set_scanout_locked(struct drm_plane *plane, mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC2_ADDR(pipe), msm_framebuffer_iova(fb, mdp5_kms->id, 2)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC3_ADDR(pipe), - msm_framebuffer_iova(fb, mdp5_kms->id, 4)); + msm_framebuffer_iova(fb, mdp5_kms->id, 3)); plane->fb = fb; } From 407d34ef294727bdc200934c38d9a8241f4a5547 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 May 2015 00:38:12 +0800 Subject: [PATCH 246/481] xfrm: Always zero high-order sequence number bits As we're now always including the high bits of the sequence number in the IV generation process we need to ensure that they don't contain crap. This patch ensures that the high sequence bits are always zeroed so that we don't leak random data into the IV. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index dab57daae408..4fd725a0c500 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -99,6 +99,7 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(x->replay.oseq == 0)) { x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); @@ -177,6 +178,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(replay_esn->oseq == 0)) { replay_esn->oseq--; xfrm_audit_state_replay_overflow(x, skb); From 0f28d1281b6c54cc98746ae61e44e7f540758ed4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 May 2015 10:38:25 -0400 Subject: [PATCH 247/481] drm/radeon: retry dcpd fetch Retry the dpcd fetch several times. Some eDP panels fail several times before the fetch is successful. bug: https://bugs.freedesktop.org/show_bug.cgi?id=73530 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_dp.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 3e3290c203c6..b435c859dcbc 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -421,19 +421,21 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector) { struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; u8 msg[DP_DPCD_SIZE]; - int ret; + int ret, i; - ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg, - DP_DPCD_SIZE); - if (ret > 0) { - memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); + for (i = 0; i < 7; i++) { + ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg, + DP_DPCD_SIZE); + if (ret == DP_DPCD_SIZE) { + memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); - DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), - dig_connector->dpcd); + DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), + dig_connector->dpcd); - radeon_dp_probe_oui(radeon_connector); + radeon_dp_probe_oui(radeon_connector); - return true; + return true; + } } dig_connector->dpcd[0] = 0; return false; From 6ca121351bf65f9f3375d3b0d54c2aed3b3f47ad Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 May 2015 17:58:49 -0400 Subject: [PATCH 248/481] drm/radeon: fix error flag checking in native aux path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That atom table does not check these bits. Fixes aux regressions on some boards. Reported-by: Malte Schröder Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_dp_auxch.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index bf1fecc6cceb..fcbd60bb0349 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -30,8 +30,6 @@ AUX_SW_RX_HPD_DISCON | \ AUX_SW_RX_PARTIAL_BYTE | \ AUX_SW_NON_AUX_MODE | \ - AUX_SW_RX_MIN_COUNT_VIOL | \ - AUX_SW_RX_INVALID_STOP | \ AUX_SW_RX_SYNC_INVALID_L | \ AUX_SW_RX_SYNC_INVALID_H | \ AUX_SW_RX_INVALID_START | \ From 2fc863a5143d64b8f06576cc3d7fd5622c5cf3b5 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 20 May 2015 08:10:43 +0300 Subject: [PATCH 249/481] iwlwifi: mvm: Free fw_status after use to avoid memory leak fw_status is the only pointer pointing to a block of memory allocated above and should be freed after use. Note: this come from Klockwork static analyzer. Cc: stable@vger.kernel.org [3.19+] Fixes: 2021a89d7b8a ("iwlwifi: mvm: treat netdetect wake up separately") Signed-off-by: Haim Dreyfuss Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/d3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 1b1b2bf26819..42dadaf5e24d 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -1750,8 +1750,10 @@ static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm, int i, j, n_matches, ret; fw_status = iwl_mvm_get_wakeup_status(mvm, vif); - if (!IS_ERR_OR_NULL(fw_status)) + if (!IS_ERR_OR_NULL(fw_status)) { reasons = le32_to_cpu(fw_status->wakeup_reasons); + kfree(fw_status); + } if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED) wakeup.rfkill_release = true; From 18f84673fb0fb3b4727ecf53a7455874172899d4 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 20 May 2015 15:50:07 +0300 Subject: [PATCH 250/481] iwlwifi: nvm: force mac from otp in case nvm mac is reserved Take the MAC address from the OTP even if one is present in the NVM, if that MAC address happens to be a reserved one. Signed-off-by: Liad Kaufman Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index cf86f3cdbb8e..75e96db6626b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -533,6 +533,10 @@ static void iwl_set_hw_address_family_8000(struct device *dev, const u8 *hw_addr; if (mac_override) { + static const u8 reserved_mac[] = { + 0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00 + }; + hw_addr = (const u8 *)(mac_override + MAC_ADDRESS_OVERRIDE_FAMILY_8000); @@ -544,7 +548,12 @@ static void iwl_set_hw_address_family_8000(struct device *dev, data->hw_addr[4] = hw_addr[5]; data->hw_addr[5] = hw_addr[4]; - if (is_valid_ether_addr(data->hw_addr)) + /* + * Force the use of the OTP MAC address in case of reserved MAC + * address in the NVM, or if address is given but invalid. + */ + if (is_valid_ether_addr(data->hw_addr) && + memcmp(reserved_mac, hw_addr, ETH_ALEN) != 0) return; IWL_ERR_DEV(dev, From 165b3c4f78ca72bd83bf6abd9ecc472c09f444d3 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 4 May 2015 11:20:52 +0300 Subject: [PATCH 251/481] iwlwifi: mvm: BT Coex - duplicate the command if sent ASYNC There are buses that can't handle ASYNC command without copying them. Duplicate the host command instead. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/coex_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c index d954591e0be5..6ac6de2af977 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c @@ -776,7 +776,7 @@ static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id, struct iwl_host_cmd cmd = { .id = BT_CONFIG, .len = { sizeof(*bt_cmd), }, - .dataflags = { IWL_HCMD_DFL_NOCOPY, }, + .dataflags = { IWL_HCMD_DFL_DUP, }, .flags = CMD_ASYNC, }; struct iwl_mvm_sta *mvmsta; From dcfc7fb134afc3b8e3d1070ef6a6d6faa9d383ef Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 28 Apr 2015 08:41:55 +0300 Subject: [PATCH 252/481] iwlwifi: mvm: take the UCODE_DOWN reference when resuming The __iwl_mvm_resume() function always returns 1, which causes mac80211 to do a reconfig with IEEE80211_RECONFIG_TYPE_RESTART. This type of reconfig calls iwl_mvm_restart_complete(), where we unref the IWL_MVM_REF_UCODE_DOWN, so we should always take the reference in this case. This prevents this kind of warning from happening: [40026.103025] WARNING: at /root/iwlwifi/iwlwifi-stack-dev/drivers/net/wireless/iwlwifi/mvm/mac80211.c:236 iwl_mvm_unref+0xc9/0xd0 [iwlmvm]() [40026.105145] Modules linked in: iwlmvm(O) iwlwifi(O) mac80211(O) cfg80211(O) compat(O) ctr ccm arc4 autofs4 snd_hda_codec_hdmi snd_hda_codec_idt joydev coretemp kvm_intel kvm aesni_intel ablk_helper cryptd lrw aes_i586 snd_hda_intel xts snd_hda_codec gf128mul snd_hwdep snd_pcm snd_seq_midi dell_wmi snd_rawmidi sparse_keymap snd_seq_midi_event snd_seq uvcvideo dell_laptop videobuf2_core dcdbas microcode videodev psmouse snd_timer videobuf2_vmalloc videobuf2_memops serio_raw snd_seq_device btusb i915 snd bluetooth lpc_ich drm_kms_helper soundcore snd_page_alloc drm i2c_algo_bit wmi parport_pc ppdev video binfmt_misc rpcsec_gss_krb5 nfsd mac_hid nfs_acl nfsv4 auth_rpcgss nfs fscache lockd sunrpc msdos lp parport sdhci_pci sdhci ahci libahci e1000e mmc_core ptp pps_core [last unloaded: compat] [40026.117640] CPU: 2 PID: 3827 Comm: bash Tainted: G W O 3.10.29-dev #1 [40026.120216] Hardware name: Dell Inc. Latitude E6430/0CPWYR, BIOS A09 12/13/2012 [40026.122815] f8effd18 f8effd18 e740fd18 c168aa62 e740fd40 c103a824 c1871238 f8effd18 [40026.125527] 000000ec f8ec79c9 f8ec79c9 d5d29ba4 d5d2a20c 00000000 e740fd50 c103a862 [40026.128209] 00000009 00000000 e740fd7c f8ec79c9 f1c591c4 00000400 00000000 f8efb490 [40026.130886] Call Trace: [40026.133506] [] dump_stack+0x16/0x18 [40026.136115] [] warn_slowpath_common+0x64/0x80 [40026.138727] [] ? iwl_mvm_unref+0xc9/0xd0 [iwlmvm] [40026.141319] [] ? iwl_mvm_unref+0xc9/0xd0 [iwlmvm] [40026.143881] [] warn_slowpath_null+0x22/0x30 [40026.146453] [] iwl_mvm_unref+0xc9/0xd0 [iwlmvm] [40026.149030] [] iwl_mvm_mac_reconfig_complete+0x7d/0x210 [iwlmvm] [40026.151645] [] ? ftrace_raw_event_drv_reconfig_complete+0xc0/0xe0 [mac80211] [40026.154291] [] ieee80211_reconfig+0x28e/0x2620 [mac80211] [40026.156920] [] ? ring_buffer_unlock_commit+0xba/0x100 [40026.159585] [] ieee80211_resume+0x6d/0x80 [mac80211] [40026.162206] [] wiphy_resume+0x72/0x260 [cfg80211] [40026.164799] [] ? device_resume+0x57/0x150 [40026.167425] [] ? wiphy_suspend+0x710/0x710 [cfg80211] [40026.170075] [] dpm_run_callback+0x2e/0x50 [40026.172695] [] device_resume+0x91/0x150 [40026.175334] [] dpm_resume+0xf6/0x200 [40026.177922] [] dpm_resume_end+0x10/0x20 [40026.180489] [] suspend_devices_and_enter+0x177/0x480 [40026.183037] [] ? printk+0x4d/0x4f [40026.185559] [] pm_suspend+0x176/0x210 [40026.188065] [] state_store+0x5d/0xb0 [40026.190581] [] ? wakeup_count_show+0x50/0x50 [40026.193052] [] kobj_attr_store+0x1b/0x30 [40026.195608] [] sysfs_write_file+0xab/0x100 [40026.198055] [] ? sysfs_poll+0xa0/0xa0 [40026.200469] [] vfs_write+0xa5/0x1c0 [40026.202893] [] SyS_write+0x57/0xa0 [40026.205245] [] sysenter_do_call+0x12/0x32 [40026.207619] ---[ end trace db1d5a72a0381b0a ]--- Signed-off-by: Luciano Coelho Reviewed-by: EliadX Peller Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/d3.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 42dadaf5e24d..91ca8df007f7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -1917,6 +1917,14 @@ out: /* return 1 to reconfigure the device */ set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status); + + /* We always return 1, which causes mac80211 to do a reconfig + * with IEEE80211_RECONFIG_TYPE_RESTART. This type of + * reconfig calls iwl_mvm_restart_complete(), where we unref + * the IWL_MVM_REF_UCODE_DOWN, so we need to take the + * reference here. + */ + iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); return 1; } @@ -2023,7 +2031,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) __iwl_mvm_resume(mvm, true); rtnl_unlock(); iwl_abort_notification_waits(&mvm->notif_wait); - iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); ieee80211_restart_hw(mvm->hw); /* wait for restart and disconnect all interfaces */ From a500e469ead055f35c7b2d0a1104e1bd58e34e70 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Mon, 4 May 2015 17:03:17 +0300 Subject: [PATCH 253/481] iwlwifi: mvm: clean net-detect info if device was reset during suspend If the device is reset during suspend with net-detect enabled, we leave the net-detect information dangling and this causes the next suspend to fail with a warning: [21795.351010] WARNING: at /root/iwlwifi/iwlwifi-stack-dev/drivers/net/wireless/iwlwifi/mvm/d3.c:989 __iwl_mvm_suspend.isra.6+0x2be/0x460 [iwlmvm]() [21795.353253] Modules linked in: iwlmvm(O) iwlwifi(O) mac80211(O) cfg80211(O) compat(O) [...] [21795.366168] CPU: 1 PID: 3645 Comm: bash Tainted: G O 3.10.29-dev #1 [21795.368785] Hardware name: Dell Inc. Latitude E6430/0CPWYR, BIOS A09 12/13/2012 [21795.371441] f8ec6748 f8ec6748 e51f3ce8 c168aa62 e51f3d10 c103a824 c1871238 f8ec6748 [21795.374228] 000003dd f8eb982e f8eb982e 00000000 c3408ed4 c41edbbc e51f3d20 c103a862 [21795.377006] 00000009 00000000 e51f3da8 f8eb982e c41ee3dc 00000004 e7970000 e51f3d74 [21795.379792] Call Trace: [21795.382461] [] dump_stack+0x16/0x18 [21795.385133] [] warn_slowpath_common+0x64/0x80 [21795.387803] [] ? __iwl_mvm_suspend.isra.6+0x2be/0x460 [iwlmvm] [21795.390485] [] ? __iwl_mvm_suspend.isra.6+0x2be/0x460 [iwlmvm] [21795.393124] [] warn_slowpath_null+0x22/0x30 [21795.395787] [] __iwl_mvm_suspend.isra.6+0x2be/0x460 [iwlmvm] [21795.398464] [] iwl_mvm_suspend+0xec/0x140 [iwlmvm] [21795.401127] [] ? del_timer_sync+0xa1/0xc0 [21795.403800] [] __ieee80211_suspend+0x1de/0xff0 [mac80211] [21795.406459] [] ? mutex_lock_nested+0x25d/0x350 [21795.409084] [] ? rtnl_lock+0x14/0x20 [21795.411685] [] ieee80211_suspend+0x16/0x20 [mac80211] [21795.414318] [] wiphy_suspend+0x74/0x710 [cfg80211] [21795.416916] [] __device_suspend+0x1e2/0x220 [21795.419521] [] ? addresses_show+0xa0/0xa0 [cfg80211] [21795.422097] [] dpm_suspend+0x67/0x210 [21795.424661] [] dpm_suspend_start+0x4f/0x60 [21795.427219] [] suspend_devices_and_enter+0x60/0x480 [21795.429768] [] ? printk+0x4d/0x4f [21795.432295] [] pm_suspend+0x176/0x210 [21795.434830] [] state_store+0x5d/0xb0 [21795.437410] [] ? wakeup_count_show+0x50/0x50 [21795.439961] [] kobj_attr_store+0x1b/0x30 [21795.442514] [] sysfs_write_file+0xab/0x100 [21795.445088] [] ? sysfs_poll+0xa0/0xa0 [21795.447659] [] vfs_write+0xa5/0x1c0 [21795.450212] [] SyS_write+0x57/0xa0 [21795.452699] [] sysenter_do_call+0x12/0x32 [21795.455146] ---[ end trace faf5321baba2bfdb ]--- To fix this, call the iwl_mvm_free_nd() function in case of any error during resume. Additionally, rename the "out_unlock" label to err to make it clearer that it's only called in error conditions. Cc: stable@vger.kernel.org [3.19+] Signed-off-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/d3.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 91ca8df007f7..4310cf102d78 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -1870,15 +1870,15 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) /* get the BSS vif pointer again */ vif = iwl_mvm_get_bss_vif(mvm); if (IS_ERR_OR_NULL(vif)) - goto out_unlock; + goto err; ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test); if (ret) - goto out_unlock; + goto err; if (d3_status != IWL_D3_STATUS_ALIVE) { IWL_INFO(mvm, "Device was reset during suspend\n"); - goto out_unlock; + goto err; } /* query SRAM first in case we want event logging */ @@ -1904,7 +1904,8 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) goto out_iterate; } - out_unlock: +err: + iwl_mvm_free_nd(mvm); mutex_unlock(&mvm->mutex); out_iterate: From 292208914d8ca5a41cf68c2f1d2810a2ea2044e9 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 14 Apr 2015 11:36:23 +0300 Subject: [PATCH 254/481] iwlwifi: mvm: avoid use-after-free on iwl_mvm_d0i3_enable_tx() qos_seq points (to a struct) inside the command response data. Make sure to free the response only after qos_seq is not needed anymore. Reported-by: Heng Luo Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/ops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 1c66297d82c0..2ea01238754e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -1263,11 +1263,13 @@ static void iwl_mvm_d0i3_exit_work(struct work_struct *wk) ieee80211_iterate_active_interfaces( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d0i3_disconnect_iter, mvm); - - iwl_free_resp(&get_status_cmd); out: iwl_mvm_d0i3_enable_tx(mvm, qos_seq); + /* qos_seq might point inside resp_pkt, so free it only now */ + if (get_status_cmd.resp_pkt) + iwl_free_resp(&get_status_cmd); + /* the FW might have updated the regdomain */ iwl_mvm_update_changed_regdom(mvm); From c78e1746d3ad7d548bdf3fe491898cc453911a49 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 May 2015 17:13:33 +0200 Subject: [PATCH 255/481] net: sched: fix call_rcu() race on classifier module unloads Vijay reported that a loop as simple as ... while true; do tc qdisc add dev foo root handle 1: prio tc filter add dev foo parent 1: u32 match u32 0 0 flowid 1 tc qdisc del dev foo root rmmod cls_u32 done ... will panic the kernel. Moreover, he bisected the change apparently introducing it to 78fd1d0ab072 ("netlink: Re-add locking to netlink_lookup() and seq walker"). The removal of synchronize_net() from the netlink socket triggering the qdisc to be removed, seems to have uncovered an RCU resp. module reference count race from the tc API. Given that RCU conversion was done after e341694e3eb5 ("netlink: Convert netlink_lookup() to use RCU protected hash table") which added the synchronize_net() originally, occasion of hitting the bug was less likely (not impossible though): When qdiscs that i) support attaching classifiers and, ii) have at least one of them attached, get deleted, they invoke tcf_destroy_chain(), and thus call into ->destroy() handler from a classifier module. After RCU conversion, all classifier that have an internal prio list, unlink them and initiate freeing via call_rcu() deferral. Meanhile, tcf_destroy() releases already reference to the tp->ops->owner module before the queued RCU callback handler has been invoked. Subsequent rmmod on the classifier module is then not prevented since all module references are already dropped. By the time, the kernel invokes the RCU callback handler from the module, that function address is then invalid. One way to fix it would be to add an rcu_barrier() to unregister_tcf_proto_ops() to wait for all pending call_rcu()s to complete. synchronize_rcu() is not appropriate as under heavy RCU callback load, registered call_rcu()s could be deferred longer than a grace period. In case we don't have any pending call_rcu()s, the barrier is allowed to return immediately. Since we came here via unregister_tcf_proto_ops(), there are no users of a given classifier anymore. Further nested call_rcu()s pointing into the module space are not being done anywhere. Only cls_bpf_delete_prog() may schedule a work item, to unlock pages eventually, but that is not in the range/context of cls_bpf anymore. Fixes: 25d8c0d55f24 ("net: rcu-ify tcf_proto") Fixes: 9888faefe132 ("net: sched: cls_basic use RCU") Reported-by: Vijay Subramanian Signed-off-by: Daniel Borkmann Cc: John Fastabend Cc: Eric Dumazet Cc: Thomas Graf Cc: Jamal Hadi Salim Cc: Alexei Starovoitov Tested-by: Vijay Subramanian Acked-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b6ef9a04de06..a75864d93142 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -81,6 +81,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) struct tcf_proto_ops *t; int rc = -ENOENT; + /* Wait for outstanding call_rcu()s, if any, from a + * tcf_proto_ops's destroy() handler. + */ + rcu_barrier(); + write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (t == ops) { From 15397f153cfd69c2164c1fa593e26707ed1a3e72 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 20 May 2015 14:50:30 -0700 Subject: [PATCH 256/481] Input: joydev - don't classify the vmmouse as a joystick Joydev is currently thinking some absolute mice are joystick, and that messes up games in VMware guests, as the cursor typically gets stuck in the top left corner. Try to detect the event signature of a VMmouse input device and back off for such devices. We're still incorrectly detecting, for example, the VMware absolute USB mouse as a joystick, but adding an event signature matching also that device would be considerably more risky, so defer that to a later merge window. Signed-off-by: Thomas Hellstrom Signed-off-by: Dmitry Torokhov --- drivers/input/joydev.c | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index f362883c94e3..1d247bcf2ae2 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -747,6 +747,63 @@ static void joydev_cleanup(struct joydev *joydev) input_close_device(handle); } +static bool joydev_dev_is_absolute_mouse(struct input_dev *dev) +{ + DECLARE_BITMAP(jd_scratch, KEY_CNT); + + BUILD_BUG_ON(ABS_CNT > KEY_CNT || EV_CNT > KEY_CNT); + + /* + * Virtualization (VMware, etc) and remote management (HP + * ILO2) solutions use absolute coordinates for their virtual + * pointing devices so that there is one-to-one relationship + * between pointer position on the host screen and virtual + * guest screen, and so their mice use ABS_X, ABS_Y and 3 + * primary button events. This clashes with what joydev + * considers to be joysticks (a device with at minimum ABS_X + * axis). + * + * Here we are trying to separate absolute mice from + * joysticks. A device is, for joystick detection purposes, + * considered to be an absolute mouse if the following is + * true: + * + * 1) Event types are exactly EV_ABS, EV_KEY and EV_SYN. + * 2) Absolute events are exactly ABS_X and ABS_Y. + * 3) Keys are exactly BTN_LEFT, BTN_RIGHT and BTN_MIDDLE. + * 4) Device is not on "Amiga" bus. + */ + + bitmap_zero(jd_scratch, EV_CNT); + __set_bit(EV_ABS, jd_scratch); + __set_bit(EV_KEY, jd_scratch); + __set_bit(EV_SYN, jd_scratch); + if (!bitmap_equal(jd_scratch, dev->evbit, EV_CNT)) + return false; + + bitmap_zero(jd_scratch, ABS_CNT); + __set_bit(ABS_X, jd_scratch); + __set_bit(ABS_Y, jd_scratch); + if (!bitmap_equal(dev->absbit, jd_scratch, ABS_CNT)) + return false; + + bitmap_zero(jd_scratch, KEY_CNT); + __set_bit(BTN_LEFT, jd_scratch); + __set_bit(BTN_RIGHT, jd_scratch); + __set_bit(BTN_MIDDLE, jd_scratch); + + if (!bitmap_equal(dev->keybit, jd_scratch, KEY_CNT)) + return false; + + /* + * Amiga joystick (amijoy) historically uses left/middle/right + * button events. + */ + if (dev->id.bustype == BUS_AMIGA) + return false; + + return true; +} static bool joydev_match(struct input_handler *handler, struct input_dev *dev) { @@ -758,6 +815,10 @@ static bool joydev_match(struct input_handler *handler, struct input_dev *dev) if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_DIGI, dev->keybit)) return false; + /* Avoid absolute mice */ + if (joydev_dev_is_absolute_mouse(dev)) + return false; + return true; } From e686e9e156109cd2475196689a3144d91cf354b3 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 7 May 2015 22:24:58 -0700 Subject: [PATCH 257/481] Input: smtpe-ts - use msecs_to_jiffies() instead of HZ Use msecs_to_jiffies(20) instead of plain (HZ / 50), as the former is much more explicit about it's behavior. We want to schedule the task 20 mS from now, so make it explicit in the code. Signed-off-by: Marek Vasut Reviewed-by: Viresh Kumar Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/stmpe-ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index 2d5ff86b343f..702ad200d916 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -164,7 +164,7 @@ static irqreturn_t stmpe_ts_handler(int irq, void *data) STMPE_TSC_CTRL_TSC_EN, STMPE_TSC_CTRL_TSC_EN); /* start polling for touch_det to detect release */ - schedule_delayed_work(&ts->work, HZ / 50); + schedule_delayed_work(&ts->work, msecs_to_jiffies(20)); return IRQ_HANDLED; } From 77b071e7931dd762563ac74e3e448b2aef23ad2f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 7 May 2015 22:25:49 -0700 Subject: [PATCH 258/481] Input: smtpe-ts - wait 50mS until polling for pen-up Wait a little bit longer, 50mS instead of 20mS, until the driver starts polling for pen-up. The problematic behavior before this patch is applied is as follows. The behavior was observed on the STMPE610QTR controller. Upon a physical pen-down event, the touchscreen reports one set of x-y-p coordinates and a pen-down event. After that, the pen-up polling is triggered and since the controller is not ready yet, the polling mistakenly detects a pen-up event while the physical state is still such that the pen is down on the touch surface. The pen-up handling flushes the controller FIFO, so after that, all the samples in the controller are discarded. The controller becomes ready shortly after this bogus pen-up handling and does generate again a pen-down interrupt. This time, the controller contains x-y-p samples which all read as zero. Since pressure value is zero, this set of samples is effectively ignored by userland. In the end, the driver just bounces between pen-down and bogus pen-up handling, generating no useful results. Fix this by giving the controller a bit more time before polling it for pen-up. Signed-off-by: Marek Vasut Reviewed-by: Viresh Kumar Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/stmpe-ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index 702ad200d916..e4c31256a74d 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -164,7 +164,7 @@ static irqreturn_t stmpe_ts_handler(int irq, void *data) STMPE_TSC_CTRL_TSC_EN, STMPE_TSC_CTRL_TSC_EN); /* start polling for touch_det to detect release */ - schedule_delayed_work(&ts->work, msecs_to_jiffies(20)); + schedule_delayed_work(&ts->work, msecs_to_jiffies(50)); return IRQ_HANDLED; } From a1cae34e23b1293eccbcc8ee9b39298039c3952a Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 21 May 2015 10:01:11 +0200 Subject: [PATCH 259/481] crypto: s390/ghash - Fix incorrect ghash icv buffer handling. Multitheaded tests showed that the icv buffer in the current ghash implementation is not handled correctly. A move of this working ghash buffer value to the descriptor context fixed this. Code is tested and verified with an multithreaded application via af_alg interface. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Gerald Schaefer Reported-by: Herbert Xu Signed-off-by: Herbert Xu --- arch/s390/crypto/ghash_s390.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 7940dc90e80b..b258110da952 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -16,11 +16,12 @@ #define GHASH_DIGEST_SIZE 16 struct ghash_ctx { - u8 icv[16]; - u8 key[16]; + u8 key[GHASH_BLOCK_SIZE]; }; struct ghash_desc_ctx { + u8 icv[GHASH_BLOCK_SIZE]; + u8 key[GHASH_BLOCK_SIZE]; u8 buffer[GHASH_BLOCK_SIZE]; u32 bytes; }; @@ -28,8 +29,10 @@ struct ghash_desc_ctx { static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); memset(dctx, 0, sizeof(*dctx)); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); return 0; } @@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm, } memcpy(ctx->key, key, GHASH_BLOCK_SIZE); - memset(ctx->icv, 0, GHASH_BLOCK_SIZE); return 0; } @@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); unsigned int n; u8 *buf = dctx->buffer; int ret; @@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc, src += n; if (!dctx->bytes) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; @@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc, n = srclen & ~(GHASH_BLOCK_SIZE - 1); if (n) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n); if (ret != n) return -EIO; src += n; @@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc, return 0; } -static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +static int ghash_flush(struct ghash_desc_ctx *dctx) { u8 *buf = dctx->buffer; int ret; @@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) memset(pos, 0, dctx->bytes); - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; + + dctx->bytes = 0; } - dctx->bytes = 0; return 0; } static int ghash_final(struct shash_desc *desc, u8 *dst) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); int ret; - ret = ghash_flush(ctx, dctx); + ret = ghash_flush(dctx); if (!ret) - memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return ret; } From 2e7f43c41c042d6fed4d67aceeaae32d8f102e98 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 20 May 2015 10:36:32 +0200 Subject: [PATCH 260/481] drm/plane-helper: Adapt cursor hack to transitional helpers In commit f02ad907cd9e7fe3a6405d2d005840912f1ed258 Author: Daniel Vetter Date: Thu Jan 22 16:36:23 2015 +0100 drm/atomic-helpers: Recover full cursor plane behaviour we've added a hack to atomic helpers to never to vblank waits for cursor updates through the legacy apis since that's what X expects. Unfortunately we've (again) forgotten to adjust the transitional helpers. Do this now. This fixes regressions for drivers only partially converted over to atomic (like i915). Reported-by: Pekka Paalanen Cc: Pekka Paalanen Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Reviewed-and-tested-by: Mario Kleiner Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_plane_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 40c1db9ad7c3..2f0ed11024eb 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -465,6 +465,9 @@ int drm_plane_helper_commit(struct drm_plane *plane, if (!crtc[i]) continue; + if (crtc[i]->cursor == plane) + continue; + /* There's no other way to figure out whether the crtc is running. */ ret = drm_crtc_vblank_get(crtc[i]); if (ret == 0) { From bd7413aef860baed78c76734050c1de4194ed61c Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Thu, 21 May 2015 21:55:42 +0200 Subject: [PATCH 261/481] ARM: pxa: pxa_cplds: signedness bug in probe "base_irq" needs to be signed for the error handling to work. Also we can remove the initialization because we re-assign it later. Fixes: aa8d6b73ea33 ('ARM: pxa: pxa_cplds: add lubbock and mainstone IO') Signed-off-by: Dan Carpenter Signed-off-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/pxa_cplds_irqs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index f1aeb54fabe3..2385052b0ce1 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -107,7 +107,7 @@ static int cplds_probe(struct platform_device *pdev) struct resource *res; struct cplds *fpga; int ret; - unsigned int base_irq = 0; + int base_irq; unsigned long irqflags = 0; fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL); From d654976cbf852ee20612ee10dbe57cdacda9f452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 21:51:19 -0700 Subject: [PATCH 262/481] tcp: fix a potential deadlock in tcp_get_info() Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [] lock_acquire+0xbb/0x270 [ 523.802565] [] _raw_spin_lock+0x38/0x50 [ 523.808628] [] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [] tcp_check_req+0x3c1/0x500 [ 523.828199] [] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [] ip_local_deliver+0xaa/0xc0 [ 523.853440] [] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 11 +++++++---- net/ipv4/tcp_fastopen.c | 4 ++++ net/ipv4/tcp_input.c | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e8bbf403618f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,8 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ + u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..f1377f2a0472 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -402,6 +402,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; + u64_stats_init(&tp->syncp); tp->reordering = sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2598,6 +2599,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); @@ -2665,10 +2667,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; - spin_lock_bh(&sk->sk_lock.slock); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; - spin_unlock_bh(&sk->sk_lock.slock); + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3c673d5e6cff..46b087a27503 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,10 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, + * as we certainly are not changing upper 32bit value (0) + */ tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 243d674b3ef5..c9ab964189a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3286,7 +3286,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; + u64_stats_update_begin(&tp->syncp); tp->bytes_acked += delta; + u64_stats_update_end(&tp->syncp); tp->snd_una = ack; } @@ -3295,7 +3297,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; + u64_stats_update_begin(&tp->syncp); tp->bytes_received += delta; + u64_stats_update_end(&tp->syncp); tp->rcv_nxt = seq; } From 381c759d9916c42959515ad34a6d467e24a88e93 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 22 May 2015 04:58:12 -0500 Subject: [PATCH 263/481] ipv4: Avoid crashing in ip_error ip_error does not check if in_dev is NULL before dereferencing it. IThe following sequence of calls is possible: CPU A CPU B ip_rcv_finish ip_route_input_noref() ip_route_input_slow() inetdev_destroy() dst_input() With the result that a network device can be destroyed while processing an input packet. A crash was triggered with only unicast packets in flight, and forwarding enabled on the only network device. The error condition was created by the removal of the network device. As such it is likely the that error code was -EHOSTUNREACH, and the action taken by ip_error (if in_dev had been accessible) would have been to not increment any counters and to have tried and likely failed to send an icmp error as the network device is going away. Therefore handle this weird case by just dropping the packet if !in_dev. It will result in dropping the packet sooner, and will not result in an actual change of behavior. Fixes: 251da4130115b ("ipv4: Cache ip_error() routes even when not forwarding.") Reported-by: Vittorio Gambaletta Tested-by: Vittorio Gambaletta Signed-off-by: Vittorio Gambaletta Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bff62fc87b8e..f45f2a12f37b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -902,6 +902,10 @@ static int ip_error(struct sk_buff *skb) bool send; int code; + /* IP on this device is disabled. */ + if (!in_dev) + goto out; + net = dev_net(rt->dst.dev); if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { From 44f6731d8b68fa02f5ed65eaceac41f8c3c9279e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 22 May 2015 13:15:22 +0200 Subject: [PATCH 264/481] cdc_ncm: Fix tx_bytes statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tx_curr_frame_payload field is u32. When we try to calculate a small negative delta based on it, we end up with a positive integer close to 2^32 instead. So the tx_bytes pointer increases by about 2^32 for every transmitted frame. Fix by calculating the delta as a signed long. Cc: Ben Hutchings Reported-by: Florian Bruhin Fixes: 7a1e890e2168 ("usbnet: Fix tx_bytes statistic running backward in cdc_ncm") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index c3e4da9e79ca..8067b8fbb0ee 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1182,7 +1182,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * payload data instead. */ usbnet_set_skb_tx_stats(skb_out, n, - ctx->tx_curr_frame_payload - skb_out->len); + (long)ctx->tx_curr_frame_payload - skb_out->len); return skb_out; From d4e64c2909231222ceba0999d921e7ac8908f656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Fri, 22 May 2015 13:40:09 +0200 Subject: [PATCH 265/481] ipv4: fill in table id when replacing a route When replacing an IPv4 route, tb_id member of the new fib_alias structure is not set in the replace code path so that the new route is ignored. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Signed-off-by: Michal Kubecek Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 64c2076ced54..09b62e17dd8c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1164,6 +1164,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; + new_fa->tb_id = tb->tb_id; err = netdev_switch_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, From 6db99596d11a99f59e8f8f9353b22ef76e31ad6a Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Fri, 22 May 2015 09:22:09 -0500 Subject: [PATCH 266/481] net: macb: Document zynq gem dt binding Signed-off-by: Nathan Sullivan Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cdns-emac.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/cdns-emac.txt b/Documentation/devicetree/bindings/net/cdns-emac.txt index abd67c13d344..4451ee973223 100644 --- a/Documentation/devicetree/bindings/net/cdns-emac.txt +++ b/Documentation/devicetree/bindings/net/cdns-emac.txt @@ -3,7 +3,8 @@ Required properties: - compatible: Should be "cdns,[-]{emac}" Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. - or the generic form: "cdns,emac". + Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC. + Or the generic form: "cdns,emac". - reg: Address and length of the register set for the device - interrupts: Should contain macb interrupt - phy-mode: see ethernet.txt file in the same directory. From 222ca8e0c183309081bdb0fd5827fc2a922be3ad Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Fri, 22 May 2015 09:22:10 -0500 Subject: [PATCH 267/481] net: macb: Disable half duplex gigabit on Zynq According to the Zynq TRM, gigabit half duplex is not supported. Add a new cap and compatible string so Zynq can avoid advertising that mode. Signed-off-by: Nathan Sullivan Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 12 ++++++++++++ drivers/net/ethernet/cadence/macb.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 5f10dfc631d7..fc646a41d548 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -350,6 +350,9 @@ static int macb_mii_probe(struct net_device *dev) else phydev->supported &= PHY_BASIC_FEATURES; + if (bp->caps & MACB_CAPS_NO_GIGABIT_HALF) + phydev->supported &= ~SUPPORTED_1000baseT_Half; + phydev->advertising = phydev->supported; bp->link = 0; @@ -2699,6 +2702,14 @@ static const struct macb_config emac_config = { .init = at91ether_init, }; +static const struct macb_config zynq_config = { + .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_NO_GIGABIT_HALF, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, +}; + static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,at32ap7000-macb" }, { .compatible = "cdns,at91sam9260-macb", .data = &at91sam9260_config }, @@ -2709,6 +2720,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, + { .compatible = "cdns,zynq-gem", .data = &zynq_config }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, macb_dt_ids); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index eb7d76f7bf6a..24b1d9bcd865 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -393,6 +393,7 @@ #define MACB_CAPS_ISR_CLEAR_ON_WRITE 0x00000001 #define MACB_CAPS_USRIO_HAS_CLKEN 0x00000002 #define MACB_CAPS_USRIO_DEFAULT_IS_MII 0x00000004 +#define MACB_CAPS_NO_GIGABIT_HALF 0x00000008 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 From 9eeb5161397a54580a630c99841b5badb07cf246 Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Fri, 22 May 2015 09:22:11 -0500 Subject: [PATCH 268/481] ARM: zynq: DT: Use the zynq binding with macb Use the new zynq binding for macb ethernet, since it will disable half duplex gigabit like the Zynq TRM says to do. Signed-off-by: Nathan Sullivan Signed-off-by: David S. Miller --- arch/arm/boot/dts/zynq-7000.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index a5cd2eda3edf..9ea54b3dba09 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -193,7 +193,7 @@ }; gem0: ethernet@e000b000 { - compatible = "cdns,gem"; + compatible = "cdns,zynq-gem"; reg = <0xe000b000 0x1000>; status = "disabled"; interrupts = <0 22 4>; @@ -204,7 +204,7 @@ }; gem1: ethernet@e000c000 { - compatible = "cdns,gem"; + compatible = "cdns,zynq-gem"; reg = <0xe000c000 0x1000>; status = "disabled"; interrupts = <0 45 4>; From 47cc84ce0c2fe75c99ea5963c4b5704dd78ead54 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 22 May 2015 12:18:59 -0300 Subject: [PATCH 269/481] bridge: fix parsing of MLDv2 reports When more than a multicast address is present in a MLDv2 report, all but the first address is ignored, because the code breaks out of the loop if there has not been an error adding that address. This has caused failures when two guests connected through the bridge tried to communicate using IPv6. Neighbor discoveries would not be transmitted to the other guest when both used a link-local address and a static address. This only happens when there is a MLDv2 querier in the network. The fix will only break out of the loop when there is a failure adding a multicast address. The mdb before the patch: dev ovirtmgmt port vnet0 grp ff02::1:ff7d:6603 temp dev ovirtmgmt port vnet1 grp ff02::1:ff7d:6604 temp dev ovirtmgmt port bond0.86 grp ff02::2 temp After the patch: dev ovirtmgmt port vnet0 grp ff02::1:ff7d:6603 temp dev ovirtmgmt port vnet1 grp ff02::1:ff7d:6604 temp dev ovirtmgmt port bond0.86 grp ff02::fb temp dev ovirtmgmt port bond0.86 grp ff02::2 temp dev ovirtmgmt port bond0.86 grp ff02::d temp dev ovirtmgmt port vnet0 grp ff02::1:ff00:76 temp dev ovirtmgmt port bond0.86 grp ff02::16 temp dev ovirtmgmt port vnet1 grp ff02::1:ff00:77 temp dev ovirtmgmt port bond0.86 grp ff02::1:ff00:def temp dev ovirtmgmt port bond0.86 grp ff02::1:ffa1:40bf temp Fixes: 08b202b67264 ("bridge br_multicast: IPv6 MLD support.") Reported-by: Rik Theys Signed-off-by: Thadeu Lima de Souza Cascardo Tested-by: Rik Theys Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4b6722f8f179..a3abe6ed111e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1072,7 +1072,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, vid); - if (!err) + if (err) break; } From f96dee13b8e10f00840124255bed1d8b4c6afd6f Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Wed, 20 May 2015 14:35:30 -0700 Subject: [PATCH 270/481] net: core: 'ethtool' issue with querying phy settings When trying to configure the settings for PHY1, using commands like 'ethtool -s eth0 phyad 1 speed 100', the 'ethtool' seems to modify other settings apart from the speed of the PHY1, in the above case. The ethtool seems to query the settings for PHY0, and use this as the base to apply the new settings to the PHY1. This is causing the other settings of the PHY 1 to be wrongly configured. The issue is caused by the '_ethtool_get_settings()' API, which gets called because of the 'ETHTOOL_GSET' command, is clearing the 'cmd' pointer (of type 'struct ethtool_cmd') by calling memset. This clears all the parameters (if any) passed for the 'ETHTOOL_GSET' cmd. So the driver's callback is always invoked with 'cmd->phy_address' as '0'. The '_ethtool_get_settings()' is called from other files in the 'net/core'. So the fix is applied to the 'ethtool_get_settings()' which is only called in the context of the 'ethtool'. Signed-off-by: Arun Parameswaran Reviewed-by: Ray Jui Reviewed-by: Scott Branden Signed-off-by: David S. Miller --- net/core/ethtool.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b8922902..1347e11f5cc9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -359,7 +359,15 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) int err; struct ethtool_cmd cmd; - err = __ethtool_get_settings(dev, &cmd); + if (!dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + cmd.cmd = ETHTOOL_GSET; + + err = dev->ethtool_ops->get_settings(dev, &cmd); if (err < 0) return err; From 93a33a584e2a49a217118148125944fd02d47b54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 13:28:29 -0700 Subject: [PATCH 271/481] bridge: fix lockdep splat Following lockdep splat was reported : [ 29.382286] =============================== [ 29.382315] [ INFO: suspicious RCU usage. ] [ 29.382344] 4.1.0-0.rc0.git11.1.fc23.x86_64 #1 Not tainted [ 29.382380] ------------------------------- [ 29.382409] net/bridge/br_private.h:626 suspicious rcu_dereference_check() usage! [ 29.382455] other info that might help us debug this: [ 29.382507] rcu_scheduler_active = 1, debug_locks = 0 [ 29.382549] 2 locks held by swapper/0/0: [ 29.382576] #0: (((&p->forward_delay_timer))){+.-...}, at: [] call_timer_fn+0x5/0x4f0 [ 29.382660] #1: (&(&br->lock)->rlock){+.-...}, at: [] br_forward_delay_timer_expired+0x31/0x140 [bridge] [ 29.382754] stack backtrace: [ 29.382787] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.0-0.rc0.git11.1.fc23.x86_64 #1 [ 29.382838] Hardware name: LENOVO 422916G/LENOVO, BIOS A1KT53AUS 04/07/2015 [ 29.382882] 0000000000000000 3ebfc20364115825 ffff880666603c48 ffffffff81892d4b [ 29.382943] 0000000000000000 ffffffff81e124e0 ffff880666603c78 ffffffff8110bcd7 [ 29.383004] ffff8800785c9d00 ffff88065485ac58 ffff880c62002800 ffff880c5fc88ac0 [ 29.383065] Call Trace: [ 29.383084] [] dump_stack+0x4c/0x65 [ 29.383130] [] lockdep_rcu_suspicious+0xe7/0x120 [ 29.383178] [] br_fill_ifinfo+0x4a9/0x6a0 [bridge] [ 29.383225] [] br_ifinfo_notify+0x11b/0x4b0 [bridge] [ 29.383271] [] ? br_hold_timer_expired+0x70/0x70 [bridge] [ 29.383320] [] br_forward_delay_timer_expired+0x58/0x140 [bridge] [ 29.383371] [] ? br_hold_timer_expired+0x70/0x70 [bridge] [ 29.383416] [] call_timer_fn+0xc3/0x4f0 [ 29.383454] [] ? call_timer_fn+0x5/0x4f0 [ 29.383493] [] ? lock_release_holdtime.part.29+0xf/0x200 [ 29.383541] [] ? br_hold_timer_expired+0x70/0x70 [bridge] [ 29.383587] [] run_timer_softirq+0x244/0x490 [ 29.383629] [] __do_softirq+0xec/0x670 [ 29.383666] [] irq_exit+0x145/0x150 [ 29.383703] [] smp_apic_timer_interrupt+0x46/0x60 [ 29.383744] [] apic_timer_interrupt+0x73/0x80 [ 29.383782] [] ? cpuidle_enter_state+0x5f/0x2f0 [ 29.383832] [] ? cpuidle_enter_state+0x5b/0x2f0 Problem here is that br_forward_delay_timer_expired() is a timer handler, calling br_ifinfo_notify() which assumes either rcu_read_lock() or RTNL are held. Simplest fix seems to add rcu read lock section. Signed-off-by: Eric Dumazet Reported-by: Josh Boyer Reported-by: Dominick Grift Cc: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_stp_timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 4fcaa67750fd..7caf7fae2d5b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -97,7 +97,9 @@ static void br_forward_delay_timer_expired(unsigned long arg) netif_carrier_on(br->dev); } br_log_state(p); + rcu_read_lock(); br_ifinfo_notify(RTM_NEWLINK, p); + rcu_read_unlock(); spin_unlock(&br->lock); } From 242ddf04297f2c4768bd8eb7593ab911910c5f76 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Sat, 23 May 2015 11:46:55 +0900 Subject: [PATCH 272/481] ARM: dts: set display clock correctly for exynos4412-trats2 This patch sets display clock correctly. If Display clock isn't set correctly then you would find below messages and Display controller doesn't work correctly. exynos-drm: No connectors reported connected with modes [drm] Cannot find any crtc or sizes - going 1024x768 Fixes: abc0b1447d49 ("drm: Perform basic sanity checks on probed modes") Cc: Signed-off-by: Inki Dae Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim --- arch/arm/boot/dts/exynos4412-trats2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts index 173ffa479ad3..792394dd0f2a 100644 --- a/arch/arm/boot/dts/exynos4412-trats2.dts +++ b/arch/arm/boot/dts/exynos4412-trats2.dts @@ -736,7 +736,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <57153600>; hactive = <720>; vactive = <1280>; hfront-porch = <5>; From ae425bb2a05bebe786a25cc8ae64e9d16c4d9b83 Mon Sep 17 00:00:00 2001 From: "Vittorio G (VittGam)" Date: Fri, 22 May 2015 21:15:19 +0200 Subject: [PATCH 273/481] ALSA: usb-audio: Add quirk for MS LifeCam HD-3000 Microsoft LifeCam HD-3000 (045e:0779) needs a similar quirk for suppressing the unsupported sample rate inquiry. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=98481 Cc: Signed-off-by: Vittorio Gambaletta Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 46facfc9aec1..29175346cc4f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1118,6 +1118,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ + case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ return true; } From 3530febb5c7636f6b26d15637f68296804d26491 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 24 May 2015 08:27:52 +0200 Subject: [PATCH 274/481] Revert "ALSA: hda - Add mute-LED mode control to Thinkpad" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7290006d8c0900c56d8c58428134f02c35109d17. Through the regression report, it was revealed that the tpacpi_led_set() call to thinkpad_acpi helper doesn't only toggle the mute LED but actually mutes the sound. This is contradiction to the expectation, and rather confuses user. According to Henrique, it's not trivial to judge which TP model behaves "LED-only" and which model does whatever more intrusive, as Lenovo's implementations vary model by model. So, from the safety reason, we should revert the patch for now. Reported-by: Martin Steigerwald Cc: Pali Rohár Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/thinkpad_helper.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index d51703e30523..0a4ad5feb82e 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -72,7 +72,6 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec, if (led_set_func(TPACPI_LED_MUTE, false) >= 0) { old_vmaster_hook = spec->vmaster_mute.hook; spec->vmaster_mute.hook = update_tpacpi_mute_led; - spec->vmaster_mute_enum = 1; removefunc = false; } if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) { From cdeb6048940fa4bfb429e2f1cba0d28a11e20cd5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 May 2015 16:15:47 -0700 Subject: [PATCH 275/481] x86/asm/irq: Stop relying on magic JMP behavior for early_idt_handlers The early_idt_handlers asm code generates an array of entry points spaced nine bytes apart. It's not really clear from that code or from the places that reference it what's going on, and the code only works in the first place because GAS never generates two-byte JMP instructions when jumping to global labels. Clean up the code to generate the correct array stride (member size) explicitly. This should be considerably more robust against screw-ups, as GAS will warn if a .fill directive has a negative count. Using '. =' to advance would have been even more robust (it would generate an actual error if it tried to move backwards), but it would pad with nulls, confusing anyone who tries to disassemble the code. The new scheme should be much clearer to future readers. While we're at it, improve the comments and rename the array and common code. Binutils may start relaxing jumps to non-weak labels. If so, this change will fix our build, and we may need to backport this change. Before, on x86_64: 0000000000000000 : 0: 6a 00 pushq $0x0 2: 6a 00 pushq $0x0 4: e9 00 00 00 00 jmpq 9 5: R_X86_64_PC32 early_idt_handler-0x4 ... 48: 66 90 xchg %ax,%ax 4a: 6a 08 pushq $0x8 4c: e9 00 00 00 00 jmpq 51 4d: R_X86_64_PC32 early_idt_handler-0x4 ... 117: 6a 00 pushq $0x0 119: 6a 1f pushq $0x1f 11b: e9 00 00 00 00 jmpq 120 11c: R_X86_64_PC32 early_idt_handler-0x4 After: 0000000000000000 : 0: 6a 00 pushq $0x0 2: 6a 00 pushq $0x0 4: e9 14 01 00 00 jmpq 11d ... 48: 6a 08 pushq $0x8 4a: e9 d1 00 00 00 jmpq 120 4f: cc int3 50: cc int3 ... 117: 6a 00 pushq $0x0 119: 6a 1f pushq $0x1f 11b: eb 03 jmp 120 11d: cc int3 11e: cc int3 11f: cc int3 Signed-off-by: Andy Lutomirski Acked-by: H. Peter Anvin Cc: Binutils Cc: Borislav Petkov Cc: H.J. Lu Cc: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/ac027962af343b0c599cbfcf50b945ad2ef3d7a8.1432336324.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 14 ++++++++++++-- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/head_32.S | 33 ++++++++++++++++++--------------- arch/x86/kernel/head_64.S | 20 +++++++++++--------- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5a9856eb12ba..7d5a1929d76b 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -231,11 +231,21 @@ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) #ifdef __KERNEL__ + +/* + * early_idt_handler_array is an array of entry points referenced in the + * early IDT. For simplicity, it's a real array with one entry point + * every nine bytes. That leaves room for an optional 'push $0' if the + * vector has no error code (two bytes), a 'push $vector_number' (two + * bytes), and a jump to the common entry code (up to five bytes). + */ +#define EARLY_IDT_HANDLER_SIZE 9 + #ifndef __ASSEMBLY__ -extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; +extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; #ifdef CONFIG_TRACING -# define trace_early_idt_handlers early_idt_handlers +# define trace_early_idt_handler_array early_idt_handler_array #endif /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2b55ee6db053..5a4668136e98 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) clear_bss(); for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) - set_intr_gate(i, early_idt_handlers[i]); + set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 02d257256200..544dec4cc605 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -478,21 +478,22 @@ is486: __INIT setup_once: /* - * Set up a idt with 256 entries pointing to ignore_int, - * interrupt gates. It doesn't actually load idt - that needs - * to be done on each CPU. Interrupts are enabled elsewhere, - * when we can be relatively sure everything is ok. + * Set up a idt with 256 interrupt gates that push zero if there + * is no error code and then jump to early_idt_handler_common. + * It doesn't actually load the idt - that needs to be done on + * each CPU. Interrupts are enabled elsewhere, when we can be + * relatively sure everything is ok. */ movl $idt_table,%edi - movl $early_idt_handlers,%eax + movl $early_idt_handler_array,%eax movl $NUM_EXCEPTION_VECTORS,%ecx 1: movl %eax,(%edi) movl %eax,4(%edi) /* interrupt gate, dpl=0, present */ movl $(0x8E000000 + __KERNEL_CS),2(%edi) - addl $9,%eax + addl $EARLY_IDT_HANDLER_SIZE,%eax addl $8,%edi loop 1b @@ -524,26 +525,28 @@ setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ ret -ENTRY(early_idt_handlers) +ENTRY(early_idt_handler_array) # 36(%esp) %eflags # 32(%esp) %cs # 28(%esp) %eip # 24(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushl $0 # Dummy error code, to make stack frame uniform .endif pushl $i # 20(%esp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handlers) +ENDPROC(early_idt_handler_array) - /* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%esp) # X86_TRAP_NMI @@ -603,7 +606,7 @@ ex_entry: .Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ ALIGN diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 43eafc8afb69..e5c27f729a38 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -321,26 +321,28 @@ bad_address: jmp bad_address __INIT - .globl early_idt_handlers -early_idt_handlers: +ENTRY(early_idt_handler_array) # 104(%rsp) %rflags # 96(%rsp) %cs # 88(%rsp) %rip # 80(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushq $0 # Dummy error code, to make stack frame uniform .endif pushq $i # 72(%rsp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr +ENDPROC(early_idt_handler_array) -/* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%rsp) # X86_TRAP_NMI @@ -412,7 +414,7 @@ ENTRY(early_idt_handler) .Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) __INITDATA From ba155e2d21f6bf05de86a78dbe5bfd8757604a65 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 May 2015 18:22:35 -0700 Subject: [PATCH 276/481] Linux 4.1-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dc20bcb9b271..92a707859205 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* From cc4a84c3da6f9dd6a297dc81fe4437643a60fe03 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 May 2015 14:07:30 -0700 Subject: [PATCH 277/481] net: phy: bcm7xxx: Fix 7425 PHY ID and flags While adding support for 7425 PHY in the 7xxx PHY driver, the ID that was used was actually coming from an external PHY: a BCM5461x. Fix this by using the proper ID for the internal 7425 PHY and set the PHY_IS_INTERNAL flag, otherwise consumers of this PHY driver would not be able to properly identify it as such. Fixes: d068b02cfdfc2 ("net: phy: add BCM7425 and BCM7429 PHYs") Signed-off-by: Florian Fainelli Reviewed-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 2 +- include/linux/brcmphy.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 64c74c6a4828..b5dc59de094e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = { .name = "Broadcom BCM7425", .features = PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = 0, + .flags = PHY_IS_INTERNAL, .config_init = bcm7xxx_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 From 3f7352bf21f8fd7ba3e2fcef9488756f188e12be Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 22 May 2015 15:42:55 -0700 Subject: [PATCH 278/481] x86: bpf_jit: fix compilation of large bpf programs x86 has variable length encoding. x86 JIT compiler is trying to pick the shortest encoding for given bpf instruction. While doing so the jump targets are changing, so JIT is doing multiple passes over the program. Typical program needs 3 passes. Some very short programs converge with 2 passes. Large programs may need 4 or 5. But specially crafted bpf programs may hit the pass limit and if the program converges on the last iteration the JIT compiler will be producing an image full of 'int 3' insns. Fix this corner case by doing final iteration over bpf program. Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Reported-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Tested-by: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99f76103c6b7..ddeff4844a10 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -966,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog) } ctx.cleanup_addr = proglen; - for (pass = 0; pass < 10; pass++) { + /* JITed image shrinks with every pass and the loop iterates + * until the image stops shrinking. Very large bpf programs + * may converge on the last pass. In such case do one more + * pass to emit the final image + */ + for (pass = 0; pass < 10 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; From 5369c71f7ca24f6472f8fa883e05fa7469fab284 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Thu, 21 May 2015 19:11:02 +0400 Subject: [PATCH 279/481] net/ibm/emac: fix size of emac dump memory areas Fix in send of emac regs dump to ethtool which causing in wrong data interpretation on ethtool layer for MII and EMAC. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 16 ++++++---------- drivers/net/ethernet/ibm/emac/core.h | 7 ++----- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index de7919322190..b9df0cbd0a38 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2084,12 +2084,8 @@ static void emac_ethtool_get_pauseparam(struct net_device *ndev, static int emac_get_regs_len(struct emac_instance *dev) { - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC4_ETHTOOL_REGS_SIZE(dev); - else - return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC_ETHTOOL_REGS_SIZE(dev); + sizeof(struct emac_regs); } static int emac_ethtool_get_regs_len(struct net_device *ndev) @@ -2114,15 +2110,15 @@ static void *emac_dump_regs(struct emac_instance *dev, void *buf) struct emac_ethtool_regs_subhdr *hdr = buf; hdr->index = dev->cell_index; - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER; + } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { hdr->version = EMAC4_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev); } else { hdr->version = EMAC_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev); } + memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs)); + return (void *)(hdr + 1) + sizeof(struct emac_regs); } static void emac_ethtool_get_regs(struct net_device *ndev, diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 67f342a9f65e..28df37420da9 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -461,10 +461,7 @@ struct emac_ethtool_regs_subhdr { }; #define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) +#define EMAC4_ETHTOOL_REGS_VER 1 +#define EMAC4SYNC_ETHTOOL_REGS_VER 2 #endif /* __IBM_NEWEMAC_CORE_H */ From 466c5ac8bdf29a382d064923a60ef302dd3b2aeb Mon Sep 17 00:00:00 2001 From: Mathieu Olivari Date: Fri, 22 May 2015 19:03:29 -0700 Subject: [PATCH 280/481] net: stmmac: create one debugfs dir per net-device stmmac DebugFS entries are currently global to the driver. As a result, having more than one stmmac device in the system creates the following error: * ERROR stmmaceth, debugfs create directory failed * stmmac_hw_setup: failed debugFS registration This also results in being able to access the debugfs information for the first registered device only. This patch changes the debugfs structure to have one sub-directory per net-device. Files under "/sys/kernel/debug/stmmaceth" will now show-up under /sys/kernel/debug/stmmaceth/ethN/. Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 ++ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 76 +++++++++++++------ 2 files changed, 59 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 2ac9552d1fa3..73bab983edd9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -117,6 +117,12 @@ struct stmmac_priv { int use_riwt; int irq_wake; spinlock_t ptp_lock; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; + struct dentry *dbgfs_rings_status; + struct dentry *dbgfs_dma_cap; +#endif }; int stmmac_mdio_unregister(struct net_device *ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 05c146f718a3..2c5ce2baca87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -118,7 +118,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id); #ifdef CONFIG_DEBUG_FS static int stmmac_init_fs(struct net_device *dev); -static void stmmac_exit_fs(void); +static void stmmac_exit_fs(struct net_device *dev); #endif #define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x)) @@ -1916,7 +1916,7 @@ static int stmmac_release(struct net_device *dev) netif_carrier_off(dev); #ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(); + stmmac_exit_fs(dev); #endif stmmac_release_ptp(priv); @@ -2508,8 +2508,6 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; -static struct dentry *stmmac_rings_status; -static struct dentry *stmmac_dma_cap; static void sysfs_display_ring(void *head, int size, int extend_desc, struct seq_file *seq) @@ -2648,36 +2646,39 @@ static const struct file_operations stmmac_dma_cap_fops = { static int stmmac_init_fs(struct net_device *dev) { - /* Create debugfs entries */ - stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + struct stmmac_priv *priv = netdev_priv(dev); - if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) { - pr_err("ERROR %s, debugfs create directory failed\n", - STMMAC_RESOURCE_NAME); + /* Create per netdev entries */ + priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir); + + if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) { + pr_err("ERROR %s/%s, debugfs create directory failed\n", + STMMAC_RESOURCE_NAME, dev->name); return -ENOMEM; } /* Entry to report DMA RX/TX rings */ - stmmac_rings_status = debugfs_create_file("descriptors_status", - S_IRUGO, stmmac_fs_dir, dev, - &stmmac_rings_status_fops); + priv->dbgfs_rings_status = + debugfs_create_file("descriptors_status", S_IRUGO, + priv->dbgfs_dir, dev, + &stmmac_rings_status_fops); - if (!stmmac_rings_status || IS_ERR(stmmac_rings_status)) { + if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) { pr_info("ERROR creating stmmac ring debugfs file\n"); - debugfs_remove(stmmac_fs_dir); + debugfs_remove_recursive(priv->dbgfs_dir); return -ENOMEM; } /* Entry to report the DMA HW features */ - stmmac_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, stmmac_fs_dir, - dev, &stmmac_dma_cap_fops); + priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, + priv->dbgfs_dir, + dev, &stmmac_dma_cap_fops); - if (!stmmac_dma_cap || IS_ERR(stmmac_dma_cap)) { + if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) { pr_info("ERROR creating stmmac MMC debugfs file\n"); - debugfs_remove(stmmac_rings_status); - debugfs_remove(stmmac_fs_dir); + debugfs_remove_recursive(priv->dbgfs_dir); return -ENOMEM; } @@ -2685,11 +2686,11 @@ static int stmmac_init_fs(struct net_device *dev) return 0; } -static void stmmac_exit_fs(void) +static void stmmac_exit_fs(struct net_device *dev) { - debugfs_remove(stmmac_rings_status); - debugfs_remove(stmmac_dma_cap); - debugfs_remove(stmmac_fs_dir); + struct stmmac_priv *priv = netdev_priv(dev); + + debugfs_remove_recursive(priv->dbgfs_dir); } #endif /* CONFIG_DEBUG_FS */ @@ -3149,6 +3150,35 @@ err: __setup("stmmaceth=", stmmac_cmdline_opt); #endif /* MODULE */ +static int __init stmmac_init(void) +{ +#ifdef CONFIG_DEBUG_FS + /* Create debugfs main directory if it doesn't exist yet */ + if (!stmmac_fs_dir) { + stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + + if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) { + pr_err("ERROR %s, debugfs create directory failed\n", + STMMAC_RESOURCE_NAME); + + return -ENOMEM; + } + } +#endif + + return 0; +} + +static void __exit stmmac_exit(void) +{ +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(stmmac_fs_dir); +#endif +} + +module_init(stmmac_init) +module_exit(stmmac_exit) + MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet device driver"); MODULE_AUTHOR("Giuseppe Cavallaro "); MODULE_LICENSE("GPL"); From 397a253af5031de4a4612210055935309af4472c Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 25 May 2015 11:55:43 +0200 Subject: [PATCH 281/481] net: dp83640: fix broken calibration routine. Currently, the calibration function that corrects the initial offsets among multiple devices only works the first time. If the function is called more than once, the calibration fails and bogus offsets will be programmed into the devices. In a well hidden spot, the device documentation tells that trigger indexes 0 and 1 are special in allowing the TRIG_IF_LATE flag to actually work. This patch fixes the issue by using one of the special triggers during the recalibration method. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 496e02f961d3..7a068d99ea46 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -47,7 +47,7 @@ #define PSF_TX 0x1000 #define EXT_EVENT 1 #define CAL_EVENT 7 -#define CAL_TRIGGER 7 +#define CAL_TRIGGER 1 #define DP83640_N_PINS 12 #define MII_DP83640_MICR 0x11 From a935865c828c8cd20501f618c69f659a5b6d6a5f Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 25 May 2015 11:55:44 +0200 Subject: [PATCH 282/481] net: dp83640: reinforce locking rules. Callers of the ext_write function are supposed to hold a mutex that protects the state of the dialed page, but one caller was missing the lock from the very start, and over time the code has been changed without following the rule. This patch cleans up the call sites in violation of the rule. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 7a068d99ea46..e570036275e2 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -496,7 +496,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, else evnt |= EVNT_RISE; } + mutex_lock(&clock->extreg_lock); ext_write(0, phydev, PAGE5, PTP_EVNT, evnt); + mutex_unlock(&clock->extreg_lock); return 0; case PTP_CLK_REQ_PEROUT: @@ -532,6 +534,8 @@ static u8 status_frame_src[6] = { 0x08, 0x00, 0x17, 0x0B, 0x6B, 0x0F }; static void enable_status_frames(struct phy_device *phydev, bool on) { + struct dp83640_private *dp83640 = phydev->priv; + struct dp83640_clock *clock = dp83640->clock; u16 cfg0 = 0, ver; if (on) @@ -539,9 +543,13 @@ static void enable_status_frames(struct phy_device *phydev, bool on) ver = (PSF_PTPVER & VERSIONPTP_MASK) << VERSIONPTP_SHIFT; + mutex_lock(&clock->extreg_lock); + ext_write(0, phydev, PAGE5, PSF_CFG0, cfg0); ext_write(0, phydev, PAGE6, PSF_CFG1, ver); + mutex_unlock(&clock->extreg_lock); + if (!phydev->attached_dev) { pr_warn("expected to find an attached netdevice\n"); return; @@ -1173,11 +1181,18 @@ static int dp83640_config_init(struct phy_device *phydev) if (clock->chosen && !list_empty(&clock->phylist)) recalibrate(clock); - else + else { + mutex_lock(&clock->extreg_lock); enable_broadcast(phydev, clock->page, 1); + mutex_unlock(&clock->extreg_lock); + } enable_status_frames(phydev, true); + + mutex_lock(&clock->extreg_lock); ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE); + mutex_unlock(&clock->extreg_lock); + return 0; } From adbe088f6f8b0b7701fe07f51fe6f2bd602a6665 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 25 May 2015 11:55:45 +0200 Subject: [PATCH 283/481] net: dp83640: fix improper double spin locking. A pair of nested spin locks was introduced in commit 63502b8d0 "dp83640: Fix receive timestamp race condition". Unfortunately the 'flags' parameter was reused for the inner lock, clobbering the originally saved IRQ state. This patch fixes the issue by changing the inner lock to plain spin_lock without irqsave. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index e570036275e2..00cb41e71312 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -846,7 +846,7 @@ static void decode_rxts(struct dp83640_private *dp83640, list_del_init(&rxts->list); phy2rxts(phy_rxts, rxts); - spin_lock_irqsave(&dp83640->rx_queue.lock, flags); + spin_lock(&dp83640->rx_queue.lock); skb_queue_walk(&dp83640->rx_queue, skb) { struct dp83640_skb_info *skb_info; @@ -861,7 +861,7 @@ static void decode_rxts(struct dp83640_private *dp83640, break; } } - spin_unlock_irqrestore(&dp83640->rx_queue.lock, flags); + spin_unlock(&dp83640->rx_queue.lock); if (!shhwtstamps) list_add_tail(&rxts->list, &dp83640->rxts); From e74993aef4411995960052506a000f94411a7103 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 May 2015 15:30:47 -0700 Subject: [PATCH 284/481] xtensa: Provide dummy dma_alloc_attrs() and dma_free_attrs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xtensa:allmodconfig fails to build with the following errors. drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c: In function ‘gk20a_instobj_dtor_dma’: drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c:154:2: error: implicit declaration of function ‘dma_free_attrs’ drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c: In function ‘gk20a_instobj_ctor_dma’: drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c:218:2: error: implicit declaration of function ‘dma_alloc_attrs’ Xtensa does not provide those functions at this time. Provide dummy implementations to avoid build errors. Acked-by: Max Filippov Signed-off-by: Guenter Roeck Signed-off-by: Chris Zankel --- arch/xtensa/include/asm/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 172a02a6ad14..ba78ccf651e7 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt, return -EINVAL; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + return NULL; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ +} + #endif /* _XTENSA_DMA_MAPPING_H */ From 990ed2720717173bbdea4cfb2bad37cc7aa91495 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 21 May 2015 11:58:30 -0400 Subject: [PATCH 285/481] drm/vgem: drop DRIVER_PRIME (v2) For actual sharing of buffers with other drivers (ie. actual hardware) we'll need to pimp things out a bit better to deal w/ caching, multiple memory domains, etc. See thread: http://lists.freedesktop.org/archives/dri-devel/2015-May/083160.html But for the llvmpipe use-case this isn't a problem. Nor do we really need prime/dri3 (dri2 is sufficient). So until the other issues are sorted lets remove DRIVER_PRIME. v2: also drop the dead code [airlied: Okay I'm convinced this API could have a lot of use cases that are really really bad, yes the upload use case is valid however that isn't the only use case enabled, and if we allow all the other use cases, people will start to (ab)use them, and then they'll be ABI and my life will get worse, so disable PRIME for now] Acked-by: Thomas Hellstrom Acked-by: Daniel Vetter Signed-off-by: Rob Clark Signed-off-by: Dave Airlie --- drivers/gpu/drm/vgem/Makefile | 2 +- drivers/gpu/drm/vgem/vgem_dma_buf.c | 94 ----------------------------- drivers/gpu/drm/vgem/vgem_drv.c | 11 +--- drivers/gpu/drm/vgem/vgem_drv.h | 11 ---- 4 files changed, 2 insertions(+), 116 deletions(-) delete mode 100644 drivers/gpu/drm/vgem/vgem_dma_buf.c diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile index 1055cb79096c..3f4c7b842028 100644 --- a/drivers/gpu/drm/vgem/Makefile +++ b/drivers/gpu/drm/vgem/Makefile @@ -1,4 +1,4 @@ ccflags-y := -Iinclude/drm -vgem-y := vgem_drv.o vgem_dma_buf.o +vgem-y := vgem_drv.o obj-$(CONFIG_DRM_VGEM) += vgem.o diff --git a/drivers/gpu/drm/vgem/vgem_dma_buf.c b/drivers/gpu/drm/vgem/vgem_dma_buf.c deleted file mode 100644 index 0254438ad1a6..000000000000 --- a/drivers/gpu/drm/vgem/vgem_dma_buf.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * Copyright © 2014 The Chromium OS Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky - * - */ - -#include -#include "vgem_drv.h" - -struct sg_table *vgem_gem_prime_get_sg_table(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - BUG_ON(obj->pages == NULL); - - return drm_prime_pages_to_sg(obj->pages, obj->base.size / PAGE_SIZE); -} - -int vgem_gem_prime_pin(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - return vgem_gem_get_pages(obj); -} - -void vgem_gem_prime_unpin(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - vgem_gem_put_pages(obj); -} - -void *vgem_gem_prime_vmap(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - BUG_ON(obj->pages == NULL); - - return vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL); -} - -void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - vunmap(vaddr); -} - -struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) -{ - struct drm_vgem_gem_object *obj = NULL; - int ret; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (obj == NULL) { - ret = -ENOMEM; - goto fail; - } - - ret = drm_gem_object_init(dev, &obj->base, dma_buf->size); - if (ret) { - ret = -ENOMEM; - goto fail_free; - } - - get_dma_buf(dma_buf); - - obj->base.dma_buf = dma_buf; - obj->use_dma_buf = true; - - return &obj->base; - -fail_free: - kfree(obj); -fail: - return ERR_PTR(ret); -} diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index cb3b43525b2d..7a207ca547be 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -302,22 +302,13 @@ static const struct file_operations vgem_driver_fops = { }; static struct drm_driver vgem_driver = { - .driver_features = DRIVER_GEM | DRIVER_PRIME, + .driver_features = DRIVER_GEM, .gem_free_object = vgem_gem_free_object, .gem_vm_ops = &vgem_gem_vm_ops, .ioctls = vgem_ioctls, .fops = &vgem_driver_fops, .dumb_create = vgem_gem_dumb_create, .dumb_map_offset = vgem_gem_dumb_map, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, - .gem_prime_import = vgem_gem_prime_import, - .gem_prime_pin = vgem_gem_prime_pin, - .gem_prime_unpin = vgem_gem_prime_unpin, - .gem_prime_get_sg_table = vgem_gem_prime_get_sg_table, - .gem_prime_vmap = vgem_gem_prime_vmap, - .gem_prime_vunmap = vgem_gem_prime_vunmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h index 57ab4d8f41f9..e9f92f7ee275 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.h +++ b/drivers/gpu/drm/vgem/vgem_drv.h @@ -43,15 +43,4 @@ struct drm_vgem_gem_object { extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj); extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj); -/* vgem_dma_buf.c */ -extern struct sg_table *vgem_gem_prime_get_sg_table( - struct drm_gem_object *gobj); -extern int vgem_gem_prime_pin(struct drm_gem_object *gobj); -extern void vgem_gem_prime_unpin(struct drm_gem_object *gobj); -extern void *vgem_gem_prime_vmap(struct drm_gem_object *gobj); -extern void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -extern struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); - - #endif From 823245026ead28a244cb9df5ae79b511da128606 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 25 May 2015 17:16:49 +0800 Subject: [PATCH 286/481] ALSA: hda/realtek - Add ALC256 alias name for Dell Add ALC3246 for Dell platform. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 31f8f13be907..fc0ccc78bdc7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -884,6 +884,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0275, 0x1028, 0, "ALC3260" }, { 0x10ec0899, 0x1028, 0, "ALC3861" }, { 0x10ec0298, 0x1028, 0, "ALC3266" }, + { 0x10ec0256, 0x1028, 0, "ALC3246" }, { 0x10ec0670, 0x1025, 0, "ALC669X" }, { 0x10ec0676, 0x1025, 0, "ALC679X" }, { 0x10ec0282, 0x1043, 0, "ALC3229" }, From 68feaca0b13e453aa14ee064c1736202b48b342f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 19 Feb 2015 10:30:14 +0100 Subject: [PATCH 287/481] backlight: pwm: Handle EPROBE_DEFER while requesting the PWM When trying to request the PWM device with devm_pwm_get(), the EPROBE_DEFER flag is not handled properly. It can lead to the PWM not being found. Signed-off-by: Boris Brezillon Signed-off-by: Nicolas Ferre Acked-by: Thierry Reding Signed-off-by: Lee Jones --- drivers/video/backlight/pwm_bl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 3a145a643e0d..6897f1c1bc73 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -274,6 +274,10 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->pwm = devm_pwm_get(&pdev->dev, NULL); if (IS_ERR(pb->pwm)) { + ret = PTR_ERR(pb->pwm); + if (ret == -EPROBE_DEFER) + goto err_alloc; + dev_err(&pdev->dev, "unable to request PWM, trying legacy API\n"); pb->legacy = true; pb->pwm = pwm_request(data->pwm_id, "pwm-backlight"); From f858c7bcca8c20761a20593439fe998b4b67e86b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2015 15:32:42 +0800 Subject: [PATCH 288/481] crypto: algif_aead - Disable AEAD user-space for now The newly added AEAD user-space isn't quite ready for prime time just yet. In particular it is conflicting with the AEAD single SG list interface change so this patch disables it now. Once the SG list stuff is completely done we can then renable this interface. Signed-off-by: Herbert Xu --- crypto/Kconfig | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 8aaf298a80e1..362905e7c841 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1512,15 +1512,6 @@ config CRYPTO_USER_API_RNG This option enables the user-spaces interface for random number generator algorithms. -config CRYPTO_USER_API_AEAD - tristate "User-space interface for AEAD cipher algorithms" - depends on NET - select CRYPTO_AEAD - select CRYPTO_USER_API - help - This option enables the user-spaces interface for AEAD - cipher algorithms. - config CRYPTO_HASH_INFO bool From 4ae9944d132b160d444fa3aa875307eb0fa3eeec Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 26 May 2015 08:25:54 +0000 Subject: [PATCH 289/481] dm: run queue on re-queue Without kicking queue, requeued request may stay forever in the queue if there are no other I/O activities to the device. The original error had been in v2.6.39 with commit 7eaceaccab5f ("block: remove per-queue plugging"), which replaced conditional plugging by periodic runqueue. Commit 9d1deb83d489 in v4.1-rc1 removed the periodic runqueue and the problem started to manifest. Fixes: 9d1deb83d489 ("dm: don't schedule delayed run of the queue if nothing to do") Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a930b72314ac..0bf79a0bad37 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1164,6 +1164,7 @@ static void old_requeue_request(struct request *rq) spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); + blk_run_queue_async(q); spin_unlock_irqrestore(q->queue_lock, flags); } From e21422de817ab0b67ed1902fe1383bcdeece855e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 28 Apr 2015 10:57:29 +0100 Subject: [PATCH 290/481] MIPS: Fix CDMM to use native endian MMIO reads The MIPS Common Device Memory Map (CDMM) is internal to the core and has native endianness. There is therefore no need to byte swap the accesses on big endian targets, so convert the CDMM bus driver to use __raw_readl() rather than readl(). Fixes: 8286ae03308c ("MIPS: Add CDMM bus support") Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9904/ Signed-off-by: Ralf Baechle --- drivers/bus/mips_cdmm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 5bd792c68f9b..ab3bde16ecb4 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -453,7 +453,7 @@ void __iomem *mips_cdmm_early_probe(unsigned int dev_type) /* Look for a specific device type */ for (; drb < bus->drbs; drb += size + 1) { - acsr = readl(cdmm + drb * CDMM_DRB_SIZE); + acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE); type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT; if (type == dev_type) return cdmm + drb * CDMM_DRB_SIZE; @@ -500,7 +500,7 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus) bus->discovered = true; pr_info("cdmm%u discovery (%u blocks)\n", cpu, bus->drbs); for (; drb < bus->drbs; drb += size + 1) { - acsr = readl(cdmm + drb * CDMM_DRB_SIZE); + acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE); type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT; size = (acsr & CDMM_ACSR_DEVSIZE) >> CDMM_ACSR_DEVSIZE_SHIFT; rev = (acsr & CDMM_ACSR_DEVREV) >> CDMM_ACSR_DEVREV_SHIFT; From 70f041b6e1ff508750988ffd034ac6117d34d4d7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 28 Apr 2015 10:57:30 +0100 Subject: [PATCH 291/481] ttyFDC: Fix to use native endian MMIO reads The MIPS Common Device Memory Map (CDMM) is internal to the core and has native endianness. There is therefore no need to byte swap the accesses on big endian targets, so convert the Fast Debug Channel (FDC) TTY driver to use __raw_readl()/__raw_writel() rather than ioread32()/iowrite32(). Fixes: 4cebec609aea ("TTY: Add MIPS EJTAG Fast Debug Channel TTY driver") Fixes: c2d7ef51d731 ("ttyFDC: Implement KGDB IO operations.") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Jiri Slaby Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9905/ Acked-by: Greg Kroah-Hartman Signed-off-by: Ralf Baechle --- drivers/tty/mips_ejtag_fdc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c index 04d9e23d1ee1..358323c83b4f 100644 --- a/drivers/tty/mips_ejtag_fdc.c +++ b/drivers/tty/mips_ejtag_fdc.c @@ -174,13 +174,13 @@ struct mips_ejtag_fdc_tty { static inline void mips_ejtag_fdc_write(struct mips_ejtag_fdc_tty *priv, unsigned int offs, unsigned int data) { - iowrite32(data, priv->reg + offs); + __raw_writel(data, priv->reg + offs); } static inline unsigned int mips_ejtag_fdc_read(struct mips_ejtag_fdc_tty *priv, unsigned int offs) { - return ioread32(priv->reg + offs); + return __raw_readl(priv->reg + offs); } /* Encoding of byte stream in FDC words */ @@ -347,9 +347,9 @@ static void mips_ejtag_fdc_console_write(struct console *c, const char *s, s += inc[word.bytes - 1]; /* Busy wait until there's space in fifo */ - while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF) + while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF) ; - iowrite32(word.word, regs + REG_FDTX(c->index)); + __raw_writel(word.word, regs + REG_FDTX(c->index)); } out: local_irq_restore(flags); @@ -1227,7 +1227,7 @@ static int kgdbfdc_read_char(void) /* Read next word from KGDB channel */ do { - stat = ioread32(regs + REG_FDSTAT); + stat = __raw_readl(regs + REG_FDSTAT); /* No data waiting? */ if (stat & REG_FDSTAT_RXE) @@ -1236,7 +1236,7 @@ static int kgdbfdc_read_char(void) /* Read next word */ channel = (stat & REG_FDSTAT_RXCHAN) >> REG_FDSTAT_RXCHAN_SHIFT; - data = ioread32(regs + REG_FDRX); + data = __raw_readl(regs + REG_FDRX); } while (channel != CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN); /* Decode into rbuf */ @@ -1266,9 +1266,10 @@ static void kgdbfdc_push_one(void) return; /* Busy wait until there's space in fifo */ - while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF) + while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF) ; - iowrite32(word.word, regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN)); + __raw_writel(word.word, + regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN)); } /* flush the whole write buffer to the TX FIFO */ From 884e7e5e7d1a35b944436895852d5ed24e79d42e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 20 May 2015 05:44:54 -0700 Subject: [PATCH 292/481] MIPS: irq: Use DECLARE_BITMAP Use the generic mechanism to declare a bitmap instead of unsigned long. This could fix an overwrite defect of whatever follows irq_map. Not all "#define NR_IRQS " are a multiple of BITS_PER_LONG so using DECLARE_BITMAP allocates the proper number of longs required for the possible bits. For instance: arch/mips/include/asm/mach-ath79/irq.h:#define NR_IRQS 51 arch/mips/include/asm/mach-db1x00/irq.h:#define NR_IRQS 152 arch/mips/include/asm/mach-lantiq/falcon/irq.h:#define NR_IRQS 328 Signed-off-by: Joe Perches Cc: linux-mips Cc: LKML Cc: Gabor Juhos Cc: Manuel Lauss Cc: John Crispin Patchwork: https://patchwork.linux-mips.org/patch/10091/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index d2bfbc2e8995..51f57d841662 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -29,7 +29,7 @@ int kgdb_early_setup; #endif -static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; +static DECLARE_BITMAP(irq_map, NR_IRQS); int allocate_irqno(void) { From 5767b52c47488e288c6b6f94e4739e1fdfdf4e81 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 May 2015 12:54:18 +0200 Subject: [PATCH 293/481] MIPS: Fuloong 2E: Replace CONFIG_USB_ISP1760_HCD by CONFIG_USB_ISP1760 Since commit 100832abf065bc18 ("usb: isp1760: Make HCD support optional"), CONFIG_USB_ISP1760_HCD is automatically selected when needed. Enabling that option in the defconfig is now a no-op, and no longer enables ISP1760 HCD support. Re-enable the ISP1760 driver in the defconfig by enabling USB_ISP1760_HOST_ROLE instead. Signed-off-by: Geert Uytterhoeven Cc: Laurent Pinchart Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10180/ Signed-off-by: Ralf Baechle --- arch/mips/configs/fuloong2e_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 002680648dcb..b2a577ebce0b 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m CONFIG_USB_C67X00_HCD=m CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_ISP1760_HCD=m +CONFIG_USB_ISP1760=m CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=m CONFIG_USB_R8A66597_HCD=m From ebdd117e5aab748d06f872860f36a0b09d8aadfc Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 8 Aug 2013 21:13:54 +0800 Subject: [PATCH 294/481] alpha: clean up unnecessary MSI/MSI-X capability find PCI core will initialize device MSI/MSI-X capability in pci_msi_init_pci_dev(). So device driver should use pci_dev->msi_cap/msix_cap to determine whether the device support MSI/MSI-X instead of using pci_find_capability(pci_dev, PCI_CAP_ID_MSI/MSIX). Access to PCIe device config space again will consume more time. Signed-off-by: Yijing Wang Cc: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Matt Turner Cc: Phil Carmody Cc: linux-alpha@vger.kernel.org Signed-off-by: Matt Turner --- arch/alpha/kernel/sys_marvel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index f21d61fab678..24e41bd7d3c9 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin) pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); irq = intline; - msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI); + msi_loc = dev->msi_cap; msg_ctl = 0; if (msi_loc) pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl); From ae6d78d78a20c351164d371e94b2aadc54536b40 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 25 Nov 2013 09:55:22 +0100 Subject: [PATCH 295/481] alpha: Remove #include from Everything in arch/alpha/include/uapi/asm/types.h is protected by "#ifndef __KERNEL__", so it's unused for kernelspace. Signed-off-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Matt Turner Cc: linux-alpha@vger.kernel.org Signed-off-by: Matt Turner --- arch/alpha/include/asm/types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index f61e1a56c378..4cb4b6d3452c 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -2,6 +2,5 @@ #define _ALPHA_TYPES_H #include -#include #endif /* _ALPHA_TYPES_H */ From 614aab527b3d48f481854c215251471407473599 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 16 Dec 2013 00:36:25 +0800 Subject: [PATCH 296/481] smp, alpha: kill SMP single function call interrupt Commit 9a46ad6d6df3b54 "smp: make smp_call_function_many() use logic similar to smp_call_function_single()" has unified the way to handle single and multiple cross-CPU function calls. Now only one interrupt is needed for architecture specific code to support generic SMP function call interfaces, so kill the redundant single function call interrupt. Cc: Andrew Morton Cc: Shaohua Li Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Jiri Kosina Cc: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Matt Turner Cc: linux-alpha@vger.kernel.org Signed-off-by: Jiang Liu Signed-off-by: Matt Turner --- arch/alpha/kernel/smp.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 99ac36d5de4e..2f24447fef92 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -63,7 +63,6 @@ static struct { enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, }; @@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier) return -EINVAL; } - static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { @@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_STOP: halt(); @@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); } static void From 11447c7c4f046350f63693e2c935e6b33a1b62a8 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 10 Jan 2014 17:02:02 -0500 Subject: [PATCH 297/481] alpha: don't use module_init for non-modular core code The srm console is always built in. It will never be modular, so using module_init as an alias for __initcall is rather misleading. Fix this up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. Direct use of __initcall is discouraged, vs prioritized ones. Use of device_initcall is consistent with what __initcall maps onto, and hence does not change the init order, making the impact of this change zero. Should someone with real hardware for boot testing want to change it later to arch_initcall or console_initcall, they can do that at a later date. Reviewed-by: Richard Henderson Cc: Ivan Kokshaysky Acked-by: Matt Turner Cc: linux-alpha@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Matt Turner --- arch/alpha/kernel/srmcons.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 6f01d9ad7b81..72b59511e59a 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -237,8 +237,7 @@ srmcons_init(void) return -ENODEV; } - -module_init(srmcons_init); +device_initcall(srmcons_init); /* From 0bc25674a4a2ec32140e2f3f0b863cf87e566ad4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 21 Jan 2014 16:22:40 -0500 Subject: [PATCH 298/481] alpha: delete non-required instances of None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. Acked-by: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Matt Turner --- arch/alpha/kernel/err_ev6.c | 1 - arch/alpha/kernel/irq.c | 1 - arch/alpha/kernel/traps.c | 1 - arch/alpha/oprofile/op_model_ev4.c | 1 - arch/alpha/oprofile/op_model_ev5.c | 1 - arch/alpha/oprofile/op_model_ev6.c | 1 - arch/alpha/oprofile/op_model_ev67.c | 1 - 7 files changed, 7 deletions(-) diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 253cf1a87481..51267ac5729b 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -6,7 +6,6 @@ * Error handling code supporting Alpha systems */ -#include #include #include diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 7b2be251c30f..51f2c8654253 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 9c4c189eb22f..74aceead06e9 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c index 18aa9b4f94f1..086a0d5445c5 100644 --- a/arch/alpha/oprofile/op_model_ev4.c +++ b/arch/alpha/oprofile/op_model_ev4.c @@ -8,7 +8,6 @@ */ #include -#include #include #include diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c index c32f8a0ad925..c300f5ef3482 100644 --- a/arch/alpha/oprofile/op_model_ev5.c +++ b/arch/alpha/oprofile/op_model_ev5.c @@ -8,7 +8,6 @@ */ #include -#include #include #include diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c index 1c84cc257fc7..02edf5971614 100644 --- a/arch/alpha/oprofile/op_model_ev6.c +++ b/arch/alpha/oprofile/op_model_ev6.c @@ -8,7 +8,6 @@ */ #include -#include #include #include diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c index 34a57a126553..adb1744d20f3 100644 --- a/arch/alpha/oprofile/op_model_ev67.c +++ b/arch/alpha/oprofile/op_model_ev67.c @@ -9,7 +9,6 @@ */ #include -#include #include #include From 9f7b2d1f02c5dae9c7fd59848681c88dc3741819 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Fri, 13 Mar 2015 20:04:17 +0200 Subject: [PATCH 299/481] alpha: copy_thread(): rename 'arg' argument to 'kthread_arg' The 'arg' argument to copy_thread() is only ever used when forking a new kernel thread. Hence, rename it to 'kthread_arg' for clarity (and consistency with do_fork() and other arch-specific implementations of copy_thread()). Signed-off-by: Alex Dowad Signed-off-by: Matt Turner --- arch/alpha/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 1941a07b5811..84d13263ce46 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task) } /* - * Copy an alpha thread.. + * Copy architecture-specific thread state */ - int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, + unsigned long kthread_arg, struct task_struct *p) { extern void ret_from_fork(void); @@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, sizeof(struct switch_stack) + sizeof(struct pt_regs)); childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ - childstack->r10 = arg; + childstack->r10 = kthread_arg; childregs->hae = alpha_mv.hae_cache, childti->pcb.usp = 0; return 0; From 234306038064131a77da176d41e89e2c1535cda9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 23 Mar 2015 22:47:21 +0100 Subject: [PATCH 300/481] alpha: Fix bootpfile and bootpzfile make targets Fix the bootpfile and bootpzfile make targets to creat BOOTP images. Both targets were broken due to some missing defines to re-map ELF constants. In addition the old code used the generic vsprintf function of the kernel which we now replace by a simple and much smaller implementation for the bootloader. Signed-off-by: Helge Deller Signed-off-by: Matt Turner --- arch/alpha/boot/Makefile | 16 +- arch/alpha/boot/main.c | 1 - arch/alpha/boot/stdio.c | 306 +++++++++++++++++++++++++++++++ arch/alpha/boot/tools/objstrip.c | 3 + 4 files changed, 319 insertions(+), 7 deletions(-) create mode 100644 arch/alpha/boot/stdio.c diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile index cd143887380a..8399bd0e68e8 100644 --- a/arch/alpha/boot/Makefile +++ b/arch/alpha/boot/Makefile @@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \ tools/bootpzh bootloader bootpheader bootpzheader OBJSTRIP := $(obj)/tools/objstrip +HOSTCFLAGS := -Wall -I$(objtree)/usr/include +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + # SRM bootable image. Copy to offset 512 of a partition. $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ @@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE $(call if_changed,objstrip) -LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootpheader := -static -T # -N -relax +LDFLAGS_bootpzheader := -static -T # -N -relax -OBJ_bootlx := $(obj)/head.o $(obj)/main.o -OBJ_bootph := $(obj)/head.o $(obj)/bootp.o -OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o +OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o +OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o +OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE $(call if_changed,ld) diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 3baf2d1e908d..dd6eb4a33582 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -19,7 +19,6 @@ #include "ksize.h" -extern int vsprintf(char *, const char *, va_list); extern unsigned long switch_to_osf_pal(unsigned long nr, struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, unsigned long *vptb); diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c new file mode 100644 index 000000000000..f844dae8a54a --- /dev/null +++ b/arch/alpha/boot/stdio.c @@ -0,0 +1,306 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +# define do_div(n, base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + __rem = ((unsigned long long)(n)) % __base; \ + (n) = ((unsigned long long)(n)) / __base; \ + __rem; \ +}) + + +static int skip_atoi(const char **s) +{ + int i, c; + + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, unsigned long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if ((signed long long)num < 0) { + sign = '-'; + num = - (signed long long)num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) { + tmp[i++] = digits[do_div(num, base)]; + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if ('0' <= *fmt && *fmt <= '9') + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if ('0' <= *fmt && *fmt <= '9') + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'l' && *(fmt + 1) == 'l') { + qualifier = 'q'; + fmt += 2; + } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' + || *fmt == 'Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'q') { + num = va_arg(args, unsigned long long); + if (flags & SIGN) + num = (signed long long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c index 367d53d031fc..dee82695f48b 100644 --- a/arch/alpha/boot/tools/objstrip.c +++ b/arch/alpha/boot/tools/objstrip.c @@ -27,6 +27,9 @@ #include #ifdef __ELF__ # include +# define elfhdr elf64_hdr +# define elf_phdr elf64_phdr +# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) #endif /* bootfile size must be multiple of BLOCK_SIZE: */ From 228fa858e5865c9407b0d6710fd504e0b68bc371 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 18 May 2015 12:36:50 +0800 Subject: [PATCH 301/481] alpha: Wire up all missing implemented syscalls And still left the missing unimplemented syscalls as warnings. The related warnings for missing implemented syscalls: CALL scripts/checksyscalls.sh :1241:2: warning: #warning syscall getrandom not implemented [-Wcpp] :1244:2: warning: #warning syscall memfd_create not implemented [-Wcpp] :1250:2: warning: #warning syscall execveat not implemented [-Wcpp] Signed-off-by: Chen Gang Signed-off-by: Matt Turner --- arch/alpha/include/asm/unistd.h | 2 +- arch/alpha/include/uapi/asm/unistd.h | 3 +++ arch/alpha/kernel/systbls.S | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index c509d306db45..a56e608db2f9 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,7 +3,7 @@ #include -#define NR_SYSCALLS 511 +#define NR_SYSCALLS 514 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index d214a0358100..aa33bf5aacb6 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -472,5 +472,8 @@ #define __NR_sched_setattr 508 #define __NR_sched_getattr 509 #define __NR_renameat2 510 +#define __NR_getrandom 511 +#define __NR_memfd_create 512 +#define __NR_execveat 513 #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 24789713f1ea..9b62e3fd4f03 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -529,6 +529,9 @@ sys_call_table: .quad sys_sched_setattr .quad sys_sched_getattr .quad sys_renameat2 /* 510 */ + .quad sys_getrandom + .quad sys_memfd_create + .quad sys_execveat .size sys_call_table, . - sys_call_table .type sys_call_table, @object From cceaeddc2edf710141563c65808b5cea6829b925 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 18 May 2015 12:37:08 +0800 Subject: [PATCH 302/481] alpha: kernel: osf_sys: Set 'kts.tv_nsec' only when 'tv' has effect The related warning: CC init/do_mounts.o arch/alpha/kernel/osf_sys.c: In function 'SyS_osf_settimeofday': arch/alpha/kernel/osf_sys.c:1028:14: warning: 'kts.tv_nsec' may be used uninitialized in this function [-Wmaybe-uninitialized] kts.tv_nsec *= 1000; ^ arch/alpha/kernel/osf_sys.c:1016:18: note: 'kts' was declared here struct timespec kts; ^ Signed-off-by: Chen Gang Signed-off-by: Matt Turner --- arch/alpha/kernel/osf_sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index e51f578636a5..36dc91ace83a 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, if (tv) { if (get_tv32((struct timeval *)&kts, tv)) return -EFAULT; + kts.tv_nsec *= 1000; } if (tz) { if (copy_from_user(&ktz, tz, sizeof(*tz))) return -EFAULT; } - kts.tv_nsec *= 1000; - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } From 983942a5eacae8821882a3d348618b020098e8dc Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Tue, 26 May 2015 09:51:49 -0500 Subject: [PATCH 303/481] amd-xgbe-phy: Fix initial mode when autoneg is disabled When the ethtool command is used to set the speed of the device while the device is down, the check to set the initial mode may fail when the device is brought up, causing failure to bring the device up. Update the code to set the initial mode based on the desired speed if auto-negotiation is disabled. This patch fixes a bug introduced by: d9663c8c2149 ("amd-xgbe-phy: Use phydev advertising field vs supported") Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/phy/amd-xgbe-phy.c | 45 +++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c index fb276f64cd64..34a75cba3b73 100644 --- a/drivers/net/phy/amd-xgbe-phy.c +++ b/drivers/net/phy/amd-xgbe-phy.c @@ -755,6 +755,45 @@ static int amd_xgbe_phy_set_mode(struct phy_device *phydev, return ret; } +static bool amd_xgbe_phy_use_xgmii_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_10000baseKR_Full) + return true; + } else { + if (phydev->speed == SPEED_10000) + return true; + } + + return false; +} + +static bool amd_xgbe_phy_use_gmii_2500_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_2500baseX_Full) + return true; + } else { + if (phydev->speed == SPEED_2500) + return true; + } + + return false; +} + +static bool amd_xgbe_phy_use_gmii_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_1000baseKX_Full) + return true; + } else { + if (phydev->speed == SPEED_1000) + return true; + } + + return false; +} + static int amd_xgbe_phy_set_an(struct phy_device *phydev, bool enable, bool restart) { @@ -1235,11 +1274,11 @@ static int amd_xgbe_phy_config_init(struct phy_device *phydev) /* Set initial mode - call the mode setting routines * directly to insure we are properly configured */ - if (phydev->advertising & SUPPORTED_10000baseKR_Full) + if (amd_xgbe_phy_use_xgmii_mode(phydev)) ret = amd_xgbe_phy_xgmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_1000baseKX_Full) + else if (amd_xgbe_phy_use_gmii_mode(phydev)) ret = amd_xgbe_phy_gmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_2500baseX_Full) + else if (amd_xgbe_phy_use_gmii_2500_mode(phydev)) ret = amd_xgbe_phy_gmii_2500_mode(phydev); else ret = -EINVAL; From adba657533bdd255f7b78bc8a324091f46b294cd Mon Sep 17 00:00:00 2001 From: Chris Lesiak Date: Tue, 26 May 2015 15:40:44 -0500 Subject: [PATCH 304/481] hwmon: (ntc_thermistor) Ensure iio channel is of type IIO_VOLTAGE When configured via device tree, the associated iio device needs to be measuring voltage for the conversion to resistance to be correct. Return -EINVAL if that is not the case. Signed-off-by: Chris Lesiak Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 112e4d45e4a0..68800115876b 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -239,8 +239,10 @@ static struct ntc_thermistor_platform_data * ntc_thermistor_parse_dt(struct platform_device *pdev) { struct iio_channel *chan; + enum iio_chan_type type; struct device_node *np = pdev->dev.of_node; struct ntc_thermistor_platform_data *pdata; + int ret; if (!np) return NULL; @@ -253,6 +255,13 @@ ntc_thermistor_parse_dt(struct platform_device *pdev) if (IS_ERR(chan)) return ERR_CAST(chan); + ret = iio_get_channel_type(chan, &type); + if (ret < 0) + return ERR_PTR(ret); + + if (type != IIO_VOLTAGE) + return ERR_PTR(-EINVAL); + if (of_property_read_u32(np, "pullup-uv", &pdata->pullup_uv)) return ERR_PTR(-ENODEV); if (of_property_read_u32(np, "pullup-ohm", &pdata->pullup_ohm)) From a10f0df0615abb194968fc08147f3cdd70fd5aa5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 May 2015 18:01:05 -0400 Subject: [PATCH 305/481] drm/radeon: don't share plls if monitors differ in audio support Enabling audio may enable different pll dividers. Don't share plls if the monitors differ in audio support. bug: https://bugzilla.kernel.org/show_bug.cgi?id=98751 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 42b2ea3fdcf3..e597ffc26563 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1798,7 +1798,9 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) if ((crtc->mode.clock == test_crtc->mode.clock) && (adjusted_clock == test_adjusted_clock) && (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) && - (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)) + (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) && + (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) == + drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector)))) return test_radeon_crtc->pll_id; } } From b48732e4a48d80ed4a14812f0bab09560846514e Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 26 May 2015 08:22:19 -0700 Subject: [PATCH 306/481] unix/caif: sk_socket can disappear when state is unlocked got a rare NULL pointer dereference in clear_bit Signed-off-by: Mark Salyzyn Acked-by: Hannes Frederic Sowa ---- v2: switch to sock_flag(sk, SOCK_DEAD) and added net/caif/caif_socket.c v3: return -ECONNRESET in upstream caller of wait function for SOCK_DEAD Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 8 ++++++++ net/unix/af_unix.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..112ad784838a 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -330,6 +330,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -373,6 +377,10 @@ static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } skb = skb_dequeue(&sk->sk_receive_queue); caif_check_flow_release(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..06430598cf51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1880,6 +1880,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, unix_state_unlock(sk); timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -1939,6 +1943,10 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb, *last; unix_state_lock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } last = skb = skb_peek(&sk->sk_receive_queue); again: if (skb == NULL) { From 082739aa458a74add9a2362988e5aca0367bfa53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 May 2015 14:08:03 +0200 Subject: [PATCH 307/481] tools: bpf_jit_disasm: fix segfault on disabled debugging log output With recent debugging, I noticed that bpf_jit_disasm segfaults when there's no debugging output from the JIT compiler to the kernel log. Reason is that when regexec(3) doesn't match on anything, start/end offsets are not being filled out and contain some uninitialized garbage from stack. Thus, we need zero out offsets first. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/net/bpf_jit_disasm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index c5baf9c591b7..618c2bcd4eab 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -123,6 +123,8 @@ static int get_last_jit_image(char *haystack, size_t hlen, assert(ret == 0); ptr = haystack; + memset(pmatch, 0, sizeof(pmatch)); + while (1) { ret = regexec(®ex, ptr, 1, pmatch, 0); if (ret == 0) { From 748a7295d7242bc1aaaec0e245cc61f2be754766 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 26 May 2015 03:50:04 +0300 Subject: [PATCH 308/481] net: netxen: correct sysfs bin attribute return code If read() syscall requests unexpected number of bytes from "dimm" binary attribute file, return EINVAL instead of EPERM. At the same time pin down sysfs file size to the fixed sizeof(struct netxen_dimm_cfg), which allows to exploit some missing sanity checks from kernfs (file boundary checks vs offset etc.) Signed-off-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index e0c31e3947d1..6409a06bbdf6 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -3025,9 +3025,9 @@ netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj, u8 dw, rows, cols, banks, ranks; u32 val; - if (size != sizeof(struct netxen_dimm_cfg)) { + if (size < attr->size) { netdev_err(netdev, "Invalid size\n"); - return -1; + return -EINVAL; } memset(&dimm, 0, sizeof(struct netxen_dimm_cfg)); @@ -3137,7 +3137,7 @@ out: static struct bin_attribute bin_attr_dimm = { .attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) }, - .size = 0, + .size = sizeof(struct netxen_dimm_cfg), .read = netxen_sysfs_read_dimm, }; From 49fb18972581a781658a4637de76e6069ed5964e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 May 2015 08:37:19 +0200 Subject: [PATCH 309/481] ALSA: hda - Set stream_pm ops automatically by generic parser This allows user to test power_save_node feature via sysfs or patch firmware even on the codecs that don't specify it. It'll also save a few lines. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 5 ++++- sound/pci/hda/patch_realtek.c | 1 - sound/pci/hda/patch_sigmatel.c | 1 - sound/pci/hda/patch_via.c | 1 - 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 1c8678775f40..ac0db1679f09 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4926,9 +4926,12 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec, dig_only: parse_digital(codec); - if (spec->power_down_unused || codec->power_save_node) + if (spec->power_down_unused || codec->power_save_node) { if (!codec->power_filter) codec->power_filter = snd_hda_gen_path_power_filter; + if (!codec->patch_ops.stream_pm) + codec->patch_ops.stream_pm = snd_hda_gen_stream_pm; + } if (!spec->no_analog && spec->beep_nid) { err = snd_hda_attach_beep_device(codec, spec->beep_nid); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc0ccc78bdc7..e3bf72bb278c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5724,7 +5724,6 @@ static int patch_alc269(struct hda_codec *codec) set_beep_amp(spec, spec->gen.mixer_nid, 0x04, HDA_INPUT); codec->patch_ops = alc_patch_ops; - codec->patch_ops.stream_pm = snd_hda_gen_stream_pm; #ifdef CONFIG_PM codec->patch_ops.suspend = alc269_suspend; codec->patch_ops.resume = alc269_resume; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 43c99ce4a520..0de0fd95144a 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4403,7 +4403,6 @@ static const struct hda_codec_ops stac_patch_ops = { #ifdef CONFIG_PM .suspend = stac_suspend, #endif - .stream_pm = snd_hda_gen_stream_pm, .reboot_notify = stac_shutup, }; diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 31a95cca015d..742087ef378f 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -472,7 +472,6 @@ static const struct hda_codec_ops via_patch_ops = { .init = via_init, .free = via_free, .unsol_event = snd_hda_jack_unsol_event, - .stream_pm = snd_hda_gen_stream_pm, #ifdef CONFIG_PM .suspend = via_suspend, .check_power_status = via_check_power_status, From 634a603760c26d163ff92751d91ac7b859e879c4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 26 May 2015 18:43:36 +0200 Subject: [PATCH 310/481] ARM: imx6: allow booting with old DT The GPC rewrite to IRQ domains has been on the premise that it may break suspend/resume for new kernels on old DT, but otherwise keep things working from a user perspective. This was an accepted compromise to be able to move the GIC cleanup forward. What actually happened was that booting a new kernel on an old DT crashes before even the console is up, so the user does not even see the warning that the DT is too old. The warning message suggests that this has been known before, which is clearly unacceptable. Fix the early crash by mapping the GPC memory space if the IRQ controller doesn't claim it. This keeps at least CPUidle and the needed CPU wakeup workarounds working. With this fixed the system is able to boot up properly minus the expected suspend/resume breakage. Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 4d60005e9277..bbf015056221 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void) struct device_node *np; np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc"); - if (WARN_ON(!np || - !of_find_property(np, "interrupt-controller", NULL))) - pr_warn("Outdated DT detected, system is about to crash!!!\n"); + if (WARN_ON(!np)) + return; + + if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { + pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + + /* map GPC, so that at least CPUidle and WARs keep working */ + gpc_base = of_iomap(np, 0); + } } #ifdef CONFIG_PM_GENERIC_DOMAINS From b17c70cd9211e9bbcd77a489f9c60f3d1bd4c392 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 27 May 2015 10:10:26 +0200 Subject: [PATCH 311/481] ARM: imx6: gpc: don't register power domain if DT data is missing If the devicetree is too old and does not provide the regulator and clocks for the power domain, we need to avoid registering the power domain. Otherwise runtime PM will try to control the domain, which will lead to machine hangs without the proper DT configuration data. This restores functionality to the kernel 4.0 level if an old DT is detected, where the power domain is constantly powered on. Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index bbf015056221..6d0893a3828e 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -449,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev) struct regulator *pu_reg; int ret; + /* bail out if DT too old and doesn't provide the necessary info */ + if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells")) + return 0; + pu_reg = devm_regulator_get_optional(&pdev->dev, "pu"); if (PTR_ERR(pu_reg) == -ENODEV) pu_reg = NULL; From e0c21530fa91f119bfca19640a67380c6b14f12a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 15 May 2015 16:27:40 +0200 Subject: [PATCH 312/481] mfd: da9052: Fix broken regulator probe Fix broken probe of da9052 regulators, which since commit b3f6c73db732 ("mfd: da9052-core: Fix platform-device id collision") use a non-deterministic platform-device id to retrieve static regulator information. Fortunately, adequate error handling was in place so probe would simply fail with an error message. Update the mfd-cell ids to be zero-based and use those to identify the cells when probing the regulator devices. Fixes: b3f6c73db732 ("mfd: da9052-core: Fix platform-device id collision") Cc: stable # v3.19 Signed-off-by: Johan Hovold Acked-by: Bartlomiej Zolnierkiewicz Reviewed-by: Mark Brown Signed-off-by: Lee Jones --- drivers/mfd/da9052-core.c | 8 ++++---- drivers/regulator/da9052-regulator.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c index ae498b53ee40..46e3840c7a37 100644 --- a/drivers/mfd/da9052-core.c +++ b/drivers/mfd/da9052-core.c @@ -431,6 +431,10 @@ int da9052_adc_read_temp(struct da9052 *da9052) EXPORT_SYMBOL_GPL(da9052_adc_read_temp); static const struct mfd_cell da9052_subdev_info[] = { + { + .name = "da9052-regulator", + .id = 0, + }, { .name = "da9052-regulator", .id = 1, @@ -483,10 +487,6 @@ static const struct mfd_cell da9052_subdev_info[] = { .name = "da9052-regulator", .id = 13, }, - { - .name = "da9052-regulator", - .id = 14, - }, { .name = "da9052-onkey", }, diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c index 8a4df7a1f2ee..e628d4c2f2ae 100644 --- a/drivers/regulator/da9052-regulator.c +++ b/drivers/regulator/da9052-regulator.c @@ -394,6 +394,7 @@ static inline struct da9052_regulator_info *find_regulator_info(u8 chip_id, static int da9052_regulator_probe(struct platform_device *pdev) { + const struct mfd_cell *cell = mfd_get_cell(pdev); struct regulator_config config = { }; struct da9052_regulator *regulator; struct da9052 *da9052; @@ -409,7 +410,7 @@ static int da9052_regulator_probe(struct platform_device *pdev) regulator->da9052 = da9052; regulator->info = find_regulator_info(regulator->da9052->chip_id, - pdev->id); + cell->id); if (regulator->info == NULL) { dev_err(&pdev->dev, "invalid regulator ID specified\n"); return -EINVAL; @@ -419,7 +420,7 @@ static int da9052_regulator_probe(struct platform_device *pdev) config.driver_data = regulator; config.regmap = da9052->regmap; if (pdata && pdata->regulators) { - config.init_data = pdata->regulators[pdev->id]; + config.init_data = pdata->regulators[cell->id]; } else { #ifdef CONFIG_OF struct device_node *nproot = da9052->dev->of_node; From 5c31b2800d8d3e735e5ecac8fc13d1cf862fd330 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Tue, 26 May 2015 10:28:21 +0200 Subject: [PATCH 313/481] x86/mce: Fix monarch timeout setting through the mce= cmdline option Using "mce=1,10000000" on the kernel cmdline to change the monarch timeout does not work. The cause is that get_option() does parse a subsequent comma in the option string and signals that with a return value. So we don't need to check for a second comma ourselves. Signed-off-by: Xie XiuQi Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1432120943-25028-1-git-send-email-xiexiuqi@huawei.com Link: http://lkml.kernel.org/r/1432628901-18044-19-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 521e5016aca6..0cbcd3183acf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2014,11 +2014,8 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "bios_cmci_threshold")) cfg->bios_cmci_threshold = true; else if (isdigit(str[0])) { - get_option(&str, &(cfg->tolerant)); - if (*str == ',') { - ++str; + if (get_option(&str, &cfg->tolerant) == 2) get_option(&str, &(cfg->monarch_timeout)); - } } else { pr_info("mce argument %s ignored. Please use /sys\n", str); return 0; From 10455f64aff0d715dcdfb09b02393df168fe267e Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:04 +0200 Subject: [PATCH 314/481] x86/mm/kconfig: Simplify conditions for HAVE_ARCH_HUGE_VMAP Simplify the conditions selecting HAVE_ARCH_HUGE_VMAP since X86_PAE depends on X86_32 already. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-2-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..4eb0b0ffae85 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,7 +100,7 @@ config X86 select IRQ_FORCED_THREADING select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select ARCH_HAS_SG_CHAIN select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG From 7f0431e3dc8953f41e9433581c1fdd7ee45860b0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:05 +0200 Subject: [PATCH 315/481] x86/mm/mtrr: Fix MTRR lookup to handle an inclusive entry When an MTRR entry is inclusive to a requested range, i.e. the start and end of the request are not within the MTRR entry range but the range contains the MTRR entry entirely: range_start ... [mtrr_start ... mtrr_end] ... range_end __mtrr_type_lookup() ignores such a case because both start_state and end_state are set to zero. This bug can cause the following issues: 1) reserve_memtype() tracks an effective memory type in case a request type is WB (ex. /dev/mem blindly uses WB). Missing to track with its effective type causes a subsequent request to map the same range with the effective type to fail. 2) pud_set_huge() and pmd_set_huge() check if a requested range has any overlap with MTRRs. Missing to detect an overlap may cause a performance penalty or undefined behavior. This patch fixes the bug by adding a new flag, 'inclusive', to detect the inclusive case. This case is then handled in the same way as end_state:1 since the first region is the same. With this fix, __mtrr_type_lookup() handles the inclusive case properly. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-3-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5b239679cfc9..e202d26f64a2 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -154,7 +154,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) prev_match = 0xFF; for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state; + unsigned short start_state, end_state, inclusive; if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) continue; @@ -166,19 +166,27 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) start_state = ((start & mask) == (base & mask)); end_state = ((end & mask) == (base & mask)); + inclusive = ((start < base) && (end > base)); - if (start_state != end_state) { + if ((start_state != end_state) || inclusive) { /* * We have start:end spanning across an MTRR. - * We split the region into - * either - * (start:mtrr_end) (mtrr_end:end) - * or - * (start:mtrr_start) (mtrr_start:end) + * We split the region into either + * + * - start_state:1 + * (start:mtrr_end)(mtrr_end:end) + * - end_state:1 + * (start:mtrr_start)(mtrr_start:end) + * - inclusive:1 + * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end) + * * depending on kind of overlap. - * Return the type for first region and a pointer to - * the start of second region so that caller will - * lookup again on the second region. + * + * Return the type of the first region and a pointer + * to the start of next region so that caller will be + * advised to lookup again after having adjusted start + * and end. + * * Note: This way we handle multiple overlaps as well. */ if (start_state) From 9b3aca620883fc06636737c82a4d024b22182281 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:06 +0200 Subject: [PATCH 316/481] x86/mm/mtrr: Fix MTRR state checks in mtrr_type_lookup() 'mtrr_state.enabled' contains the FE (fixed MTRRs enabled) and E (MTRRs enabled) flags in MSR_MTRRdefType. Intel SDM, section 11.11.2.1, defines these flags as follows: - All MTRRs are disabled when the E flag is clear. The FE flag has no affect when the E flag is clear. - The default type is enabled when the E flag is set. - MTRR variable ranges are enabled when the E flag is set. - MTRR fixed ranges are enabled when both E and FE flags are set. MTRR state checks in __mtrr_type_lookup() do not match with SDM. Hence, this patch makes the following changes: - The current code detects MTRRs disabled when both E and FE flags are clear in mtrr_state.enabled. Fix to detect MTRRs disabled when the E flag is clear. - The current code does not check if the FE bit is set in mtrr_state.enabled when looking at the fixed entries. Fix to check the FE flag. - The current code returns the default type when the E flag is clear in mtrr_state.enabled. However, the default type is UC when the E flag is clear. Remove the code as this case is handled as MTRR disabled with the 1st change. In addition, this patch defines the E and FE flags in mtrr_state.enabled as follows. - FE flag: MTRR_STATE_MTRR_FIXED_ENABLED - E flag: MTRR_STATE_MTRR_ENABLED print_mtrr_state() and x86_get_mtrr_mem_range() are also updated accordingly. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-4-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 4 ++++ arch/x86/kernel/cpu/mtrr/cleanup.c | 3 ++- arch/x86/kernel/cpu/mtrr/generic.c | 15 ++++++++------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index f768f6298419..ef927948657c 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -127,4 +127,8 @@ struct mtrr_gentry32 { _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) #endif /* CONFIG_COMPAT */ +/* Bit fields for enabled in struct mtrr_state_type */ +#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 +#define MTRR_STATE_MTRR_ENABLED 0x02 + #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 5f90b85ff22e..70d7c93f4550 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, continue; base = range_state[i].base_pfn; if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && - (mtrr_state.enabled & 1)) { + (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ printk(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e202d26f64a2..b0599dbb899a 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -119,14 +119,16 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) if (!mtrr_state_set) return 0xFF; - if (!mtrr_state.enabled) + if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) return 0xFF; /* Make end inclusive end, instead of exclusive */ end--; /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state.have_fixed && (start < 0x100000)) { + if ((start < 0x100000) && + (mtrr_state.have_fixed) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { int idx; if (start < 0x80000) { @@ -149,9 +151,6 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - if (!(mtrr_state.enabled & 2)) - return mtrr_state.def_type; - prev_match = 0xFF; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -355,7 +354,9 @@ static void __init print_mtrr_state(void) mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { pr_debug("MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); + ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? + "en" : "dis"); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) print_fixed(0x80000 + i * 0x20000, 0x04000, @@ -368,7 +369,7 @@ static void __init print_mtrr_state(void) print_fixed_last(); } pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); + mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { From 3d3ca416d9b0784cfcf244eeeba1bcaf421bc64d Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:07 +0200 Subject: [PATCH 317/481] x86/mm/mtrr: Use symbolic define as a retval for disabled MTRRs mtrr_type_lookup() returns verbatim 0xFF when MTRRs are disabled. This patch defines MTRR_TYPE_INVALID to clarify the meaning of this value, and documents its usage. Document the return values of the kernel virtual address mapping helpers pud_set_huge(), pmd_set_huge, pud_clear_huge() and pmd_clear_huge(). There is no functional change in this patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-5-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 2 +- arch/x86/include/uapi/asm/mtrr.h | 8 +++++- arch/x86/kernel/cpu/mtrr/generic.c | 14 +++++----- arch/x86/mm/pgtable.c | 42 +++++++++++++++++++++++------- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index ef927948657c..bb03a547c1ab 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -55,7 +55,7 @@ static inline u8 mtrr_type_lookup(u64 addr, u64 end) /* * Return no-MTRRs: */ - return 0xff; + return MTRR_TYPE_INVALID; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index d0acb658c8f4..7528dcf59691 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -103,7 +103,7 @@ struct mtrr_state_type { #define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) #define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) -/* These are the region types */ +/* MTRR memory types, which are defined in SDM */ #define MTRR_TYPE_UNCACHABLE 0 #define MTRR_TYPE_WRCOMB 1 /*#define MTRR_TYPE_ 2*/ @@ -113,5 +113,11 @@ struct mtrr_state_type { #define MTRR_TYPE_WRBACK 6 #define MTRR_NUM_TYPES 7 +/* + * Invalid MTRR memory type. mtrr_type_lookup() returns this value when + * MTRRs are disabled. Note, this value is allocated from the reserved + * values (0x7-0xff) of the MTRR memory types. + */ +#define MTRR_TYPE_INVALID 0xff #endif /* _UAPI_ASM_X86_MTRR_H */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b0599dbb899a..7b1491c6232d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -104,7 +104,7 @@ static int check_type_overlap(u8 *prev, u8 *curr) /* * Error/Semi-error returns: - * 0xFF - when MTRR is not enabled + * MTRR_TYPE_INVALID - when MTRR is not enabled * *repeat == 1 implies [start:end] spanned across MTRR range and type returned * corresponds only to [start:*partial_end]. * Caller has to lookup again for [*partial_end:end]. @@ -117,10 +117,10 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) *repeat = 0; if (!mtrr_state_set) - return 0xFF; + return MTRR_TYPE_INVALID; if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) - return 0xFF; + return MTRR_TYPE_INVALID; /* Make end inclusive end, instead of exclusive */ end--; @@ -151,7 +151,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - prev_match = 0xFF; + prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -206,7 +206,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) continue; curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == 0xFF) { + if (prev_match == MTRR_TYPE_INVALID) { prev_match = curr_match; continue; } @@ -220,7 +220,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) return MTRR_TYPE_WRBACK; } - if (prev_match != 0xFF) + if (prev_match != MTRR_TYPE_INVALID) return prev_match; return mtrr_state.def_type; @@ -229,7 +229,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) /* * Returns the effective MTRR type for the region * Error return: - * 0xFF - when MTRR is not enabled + * MTRR_TYPE_INVALID - when MTRR is not enabled */ u8 mtrr_type_lookup(u64 start, u64 end) { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0b97d2c75df3..c30f9819786b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -563,16 +563,22 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +/** + * pud_set_huge - setup kernel PUD mapping + * + * MTRR can override PAT memory types with 4KiB granularity. Therefore, + * this function does not set up a huge page when the range is covered + * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are + * disabled. + * + * Returns 1 on success and 0 on failure. + */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { u8 mtrr; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) return 0; prot = pgprot_4k_2_large(prot); @@ -584,16 +590,22 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pmd_set_huge - setup kernel PMD mapping + * + * MTRR can override PAT memory types with 4KiB granularity. Therefore, + * this function does not set up a huge page when the range is covered + * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are + * disabled. + * + * Returns 1 on success and 0 on failure. + */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { u8 mtrr; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) return 0; prot = pgprot_4k_2_large(prot); @@ -605,6 +617,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ int pud_clear_huge(pud_t *pud) { if (pud_large(*pud)) { @@ -615,6 +632,11 @@ int pud_clear_huge(pud_t *pud) return 0; } +/** + * pmd_clear_huge - clear kernel PMD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PMD map is found). + */ int pmd_clear_huge(pmd_t *pmd) { if (pmd_large(*pmd)) { From 0cc705f56e400764a171055f727d28a48260bb4b Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:08 +0200 Subject: [PATCH 318/481] x86/mm/mtrr: Clean up mtrr_type_lookup() MTRRs contain fixed and variable entries. mtrr_type_lookup() may repeatedly call __mtrr_type_lookup() to handle a request that overlaps with variable entries. However, __mtrr_type_lookup() also handles the fixed entries, which do not have to be repeated. Therefore, this patch creates separate functions, mtrr_type_lookup_fixed() and mtrr_type_lookup_variable(), to handle the fixed and variable ranges respectively. The patch also updates the function headers to clarify the return values and output argument. It updates comments to clarify that the repeating is necessary to handle overlaps with the default type, since overlaps with multiple entries alone can be handled without such repeating. There is no functional change in this patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-6-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 138 ++++++++++++++++++----------- 1 file changed, 86 insertions(+), 52 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7b1491c6232d..e51100c49eea 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -102,55 +102,68 @@ static int check_type_overlap(u8 *prev, u8 *curr) return 0; } -/* - * Error/Semi-error returns: - * MTRR_TYPE_INVALID - when MTRR is not enabled - * *repeat == 1 implies [start:end] spanned across MTRR range and type returned - * corresponds only to [start:*partial_end]. - * Caller has to lookup again for [*partial_end:end]. +/** + * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries + * + * Return the MTRR fixed memory type of 'start'. + * + * MTRR fixed entries are divided into the following ways: + * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges + * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges + * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges + * + * Return Values: + * MTRR_TYPE_(type) - Matched memory type + * MTRR_TYPE_INVALID - Unmatched */ -static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) +static u8 mtrr_type_lookup_fixed(u64 start, u64 end) +{ + int idx; + + if (start >= 0x100000) + return MTRR_TYPE_INVALID; + + /* 0x0 - 0x7FFFF */ + if (start < 0x80000) { + idx = 0; + idx += (start >> 16); + return mtrr_state.fixed_ranges[idx]; + /* 0x80000 - 0xBFFFF */ + } else if (start < 0xC0000) { + idx = 1 * 8; + idx += ((start - 0x80000) >> 14); + return mtrr_state.fixed_ranges[idx]; + } + + /* 0xC0000 - 0xFFFFF */ + idx = 3 * 8; + idx += ((start - 0xC0000) >> 12); + return mtrr_state.fixed_ranges[idx]; +} + +/** + * mtrr_type_lookup_variable - look up memory type in MTRR variable entries + * + * Return Value: + * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) + * + * Output Argument: + * repeat - Set to 1 when [start:end] spanned across MTRR range and type + * returned corresponds only to [start:*partial_end]. Caller has + * to lookup again for [*partial_end:end]. + */ +static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, + int *repeat) { int i; u64 base, mask; u8 prev_match, curr_match; *repeat = 0; - if (!mtrr_state_set) - return MTRR_TYPE_INVALID; - if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) - return MTRR_TYPE_INVALID; - - /* Make end inclusive end, instead of exclusive */ + /* Make end inclusive instead of exclusive */ end--; - /* Look in fixed ranges. Just return the type as per start */ - if ((start < 0x100000) && - (mtrr_state.have_fixed) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { - int idx; - - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state.fixed_ranges[idx]; - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state.fixed_ranges[idx]; - } else { - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state.fixed_ranges[idx]; - } - } - - /* - * Look in variable ranges - * Look of multiple ranges matching this address and pick type - * as per MTRR precedence - */ prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -186,7 +199,8 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) * advised to lookup again after having adjusted start * and end. * - * Note: This way we handle multiple overlaps as well. + * Note: This way we handle overlaps with multiple + * entries and the default type properly. */ if (start_state) *partial_end = base + get_mtrr_size(mask); @@ -215,21 +229,18 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) return curr_match; } - if (mtrr_tom2) { - if (start >= (1ULL<<32) && (end < mtrr_tom2)) - return MTRR_TYPE_WRBACK; - } - if (prev_match != MTRR_TYPE_INVALID) return prev_match; return mtrr_state.def_type; } -/* - * Returns the effective MTRR type for the region - * Error return: - * MTRR_TYPE_INVALID - when MTRR is not enabled +/** + * mtrr_type_lookup - look up memory type in MTRR + * + * Return Values: + * MTRR_TYPE_(type) - The effective MTRR type for the region + * MTRR_TYPE_INVALID - MTRR is disabled */ u8 mtrr_type_lookup(u64 start, u64 end) { @@ -237,22 +248,45 @@ u8 mtrr_type_lookup(u64 start, u64 end) int repeat; u64 partial_end; - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); + if (!mtrr_state_set) + return MTRR_TYPE_INVALID; + + if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) + return MTRR_TYPE_INVALID; + + /* + * Look up the fixed ranges first, which take priority over + * the variable ranges. + */ + if ((start < 0x100000) && + (mtrr_state.have_fixed) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) + return mtrr_type_lookup_fixed(start, end); + + /* + * Look up the variable ranges. Look of multiple ranges matching + * this address and pick type as per MTRR precedence. + */ + type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); /* * Common path is with repeat = 0. * However, we can have cases where [start:end] spans across some - * MTRR range. Do repeated lookups for that case here. + * MTRR ranges and/or the default type. Do repeated lookups for + * that case here. */ while (repeat) { prev_type = type; start = partial_end; - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); + type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); if (check_type_overlap(&prev_type, &type)) return type; } + if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) + return MTRR_TYPE_WRBACK; + return type; } From b73522e0c1be58d3c69b124985b8ccf94e3677f7 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:10 +0200 Subject: [PATCH 319/481] x86/mm/mtrr: Enhance MTRR checks in kernel mapping helpers This patch adds the argument 'uniform' to mtrr_type_lookup(), which gets set to 1 when a given range is covered uniformly by MTRRs, i.e. the range is fully covered by a single MTRR entry or the default type. Change pud_set_huge() and pmd_set_huge() to honor the 'uniform' flag to see if it is safe to create a huge page mapping in the range. This allows them to create a huge page mapping in a range covered by a single MTRR entry of any memory type. It also detects a non-optimal request properly. They continue to check with the WB type since it does not effectively change the uniform mapping even if a request spans multiple MTRR entries. pmd_set_huge() logs a warning message to a non-optimal request so that driver writers will be aware of such a case. Drivers should make a mapping request aligned to a single MTRR entry when the range is covered by MTRRs. Signed-off-by: Toshi Kani [ Realign, flesh out comments, improve warning message. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-7-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 4 +-- arch/x86/kernel/cpu/mtrr/generic.c | 40 ++++++++++++++++++++++-------- arch/x86/mm/pat.c | 4 +-- arch/x86/mm/pgtable.c | 38 +++++++++++++++++----------- 4 files changed, 58 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index bb03a547c1ab..a31759e1edd9 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -31,7 +31,7 @@ * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR -extern u8 mtrr_type_lookup(u64 addr, u64 end); +extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); extern int mtrr_add(unsigned long base, unsigned long size, @@ -50,7 +50,7 @@ extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); extern int phys_wc_to_mtrr_index(int handle); # else -static inline u8 mtrr_type_lookup(u64 addr, u64 end) +static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* * Return no-MTRRs: diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e51100c49eea..f782d9b62cb3 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -147,19 +147,24 @@ static u8 mtrr_type_lookup_fixed(u64 start, u64 end) * Return Value: * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) * - * Output Argument: + * Output Arguments: * repeat - Set to 1 when [start:end] spanned across MTRR range and type * returned corresponds only to [start:*partial_end]. Caller has * to lookup again for [*partial_end:end]. + * + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, - int *repeat) + int *repeat, u8 *uniform) { int i; u64 base, mask; u8 prev_match, curr_match; *repeat = 0; + *uniform = 1; /* Make end inclusive instead of exclusive */ end--; @@ -214,6 +219,7 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, end = *partial_end - 1; /* end is inclusive */ *repeat = 1; + *uniform = 0; } if ((start & mask) != (base & mask)) @@ -225,6 +231,7 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, continue; } + *uniform = 0; if (check_type_overlap(&prev_match, &curr_match)) return curr_match; } @@ -241,10 +248,15 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, * Return Values: * MTRR_TYPE_(type) - The effective MTRR type for the region * MTRR_TYPE_INVALID - MTRR is disabled + * + * Output Argument: + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ -u8 mtrr_type_lookup(u64 start, u64 end) +u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) { - u8 type, prev_type; + u8 type, prev_type, is_uniform = 1, dummy; int repeat; u64 partial_end; @@ -260,14 +272,18 @@ u8 mtrr_type_lookup(u64 start, u64 end) */ if ((start < 0x100000) && (mtrr_state.have_fixed) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) - return mtrr_type_lookup_fixed(start, end); + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { + is_uniform = 0; + type = mtrr_type_lookup_fixed(start, end); + goto out; + } /* * Look up the variable ranges. Look of multiple ranges matching * this address and pick type as per MTRR precedence. */ - type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &is_uniform); /* * Common path is with repeat = 0. @@ -278,15 +294,19 @@ u8 mtrr_type_lookup(u64 start, u64 end) while (repeat) { prev_type = type; start = partial_end; - type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); + is_uniform = 0; + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &dummy); if (check_type_overlap(&prev_type, &type)) - return type; + goto out; } if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) - return MTRR_TYPE_WRBACK; + type = MTRR_TYPE_WRBACK; +out: + *uniform = is_uniform; return type; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 35af6771a95a..372ad422c2c3 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -267,9 +267,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, * request is for WB. */ if (req_type == _PAGE_CACHE_MODE_WB) { - u8 mtrr_type; + u8 mtrr_type, uniform; - mtrr_type = mtrr_type_lookup(start, end); + mtrr_type = mtrr_type_lookup(start, end, &uniform); if (mtrr_type != MTRR_TYPE_WRBACK) return _PAGE_CACHE_MODE_UC_MINUS; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c30f9819786b..fb0a9dd1d6e4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -566,19 +566,28 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, /** * pud_set_huge - setup kernel PUD mapping * - * MTRR can override PAT memory types with 4KiB granularity. Therefore, - * this function does not set up a huge page when the range is covered - * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are - * disabled. + * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this + * function sets up a huge page only if any of the following conditions are met: + * + * - MTRRs are disabled, or + * + * - MTRRs are enabled and the range is completely covered by a single MTRR, or + * + * - MTRRs are enabled and the corresponding MTRR memory type is WB, which + * has no effect on the requested PAT memory type. + * + * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger + * page mapping attempt fails. * * Returns 1 on success and 0 on failure. */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) + mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) return 0; prot = pgprot_4k_2_large(prot); @@ -593,20 +602,21 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) /** * pmd_set_huge - setup kernel PMD mapping * - * MTRR can override PAT memory types with 4KiB granularity. Therefore, - * this function does not set up a huge page when the range is covered - * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are - * disabled. + * See text over pud_set_huge() above. * * Returns 1 on success and 0 on failure. */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) + mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) { + pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", + __func__, addr, addr + PMD_SIZE); return 0; + } prot = pgprot_4k_2_large(prot); From 9e76561f6a8a1a1c4f3152a3fb403ef9d6cfc2ff Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:11 +0200 Subject: [PATCH 320/481] x86/mm/pat: Convert to pr_*() usage Use pr_info() instead of the old printk to prefix the component where things are coming from. With this readers will know exactly where the message is coming from. We use pr_* helpers but define pr_fmt to the empty string for easier grepping for those error messages. We leave the users of dprintk() in place, this will print only when the debugpat kernel parameter is enabled. We want to leave those enabled as a debug feature, but also make them use the same prefix. Signed-off-by: Luis R. Rodriguez [ Kill pr_fmt. ] Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Andy Walls Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cocci@systeme.lip6.fr Cc: plagnioj@jcrosoft.com Cc: tomi.valkeinen@ti.com Link: http://lkml.kernel.org/r/1430425520-22275-2-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 44 +++++++++++++++++++------------------- arch/x86/mm/pat_internal.h | 2 +- arch/x86/mm/pat_rbtree.c | 6 +++--- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 372ad422c2c3..8c50b9bfa996 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -33,13 +33,16 @@ #include "pat_internal.h" #include "mm_internal.h" +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; static inline void pat_disable(const char *reason) { pat_enabled = 0; - printk(KERN_INFO "%s\n", reason); + pr_info("x86/PAT: %s\n", reason); } static int __init nopat(char *str) @@ -188,7 +191,7 @@ void pat_init_cache_modes(void) pat_msg + 4 * i); update_cache_mode_entry(i, cache); } - pr_info("PAT configuration [0-7]: %s\n", pat_msg); + pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) @@ -211,8 +214,7 @@ void pat_init(void) * switched to PAT on the boot CPU. We have no way to * undo PAT. */ - printk(KERN_ERR "PAT enabled, " - "but not supported by secondary CPU\n"); + pr_err("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); BUG(); } } @@ -347,7 +349,7 @@ static int reserve_ram_pages_type(u64 start, u64 end, page = pfn_to_page(pfn); type = get_page_memtype(page); if (type != -1) { - pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", + pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", start, end - 1, type, req_type); if (new_type) *new_type = type; @@ -451,9 +453,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, err = rbt_memtype_check_insert(new, new_type); if (err) { - printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", - start, end - 1, - cattr_name(new->type), cattr_name(req_type)); + pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + start, end - 1, + cattr_name(new->type), cattr_name(req_type)); kfree(new); spin_unlock(&memtype_lock); @@ -497,8 +499,8 @@ int free_memtype(u64 start, u64 end) spin_unlock(&memtype_lock); if (!entry) { - printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", - current->comm, current->pid, start, end - 1); + pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", + current->comm, current->pid, start, end - 1); return -EINVAL; } @@ -628,8 +630,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", - current->comm, from, to - 1); + pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", + current->comm, from, to - 1); return 0; } cursor += PAGE_SIZE; @@ -698,8 +700,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { - printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " - "for [mem %#010Lx-%#010Lx]\n", + pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, cattr_name(pcm), base, (unsigned long long)(base + size-1)); @@ -734,7 +735,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, pcm = lookup_memtype(paddr); if (want_pcm != pcm) { - printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", + pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_pcm), (unsigned long long)paddr, @@ -755,13 +756,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, if (strict_prot || !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { free_memtype(paddr, paddr + size); - printk(KERN_ERR "%s:%d map pfn expected mapping type %s" - " for [mem %#010Lx-%#010Lx], got %s\n", - current->comm, current->pid, - cattr_name(want_pcm), - (unsigned long long)paddr, - (unsigned long long)(paddr + size - 1), - cattr_name(pcm)); + pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", + current->comm, current->pid, + cattr_name(want_pcm), + (unsigned long long)paddr, + (unsigned long long)(paddr + size - 1), + cattr_name(pcm)); return -EINVAL; } /* diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index f6411620305d..a739bfc40690 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -4,7 +4,7 @@ extern int pat_debug_enable; #define dprintk(fmt, arg...) \ - do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0) struct memtype { u64 start; diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 6582adcc8bd9..63931080366a 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -160,9 +160,9 @@ success: return 0; failure: - printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, - end, cattr_name(found_type), cattr_name(match->type)); + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(match->type)); return -EBUSY; } From 2f9e897353fcb99effd6eff22f7b464f8e2a659a Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:12 +0200 Subject: [PATCH 321/481] x86/mm/mtrr, pat: Document Write Combining MTRR type effects on PAT / non-PAT pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of the effort to phase out MTRR use document write-combining MTRR effects on pages with different non-PAT page attributes flags and different PAT entry values. Extend arch_phys_wc_add() documentation to clarify power of two sizes / boundary requirements as we phase out mtrr_add() use. Lastly hint towards ioremap_uc() for corner cases on device drivers working with devices with mixed regions where MTRR size requirements would otherwise not enable write-combining effective memory types. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Jonathan Corbet Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: linux-fbdev@vger.kernel.org Link: http://lkml.kernel.org/r/1430343851-967-3-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/mtrr.txt | 18 +++++++++++++++--- Documentation/x86/pat.txt | 33 +++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 3 +++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Documentation/x86/mtrr.txt b/Documentation/x86/mtrr.txt index cc071dc333c2..860bc3adc223 100644 --- a/Documentation/x86/mtrr.txt +++ b/Documentation/x86/mtrr.txt @@ -1,7 +1,19 @@ MTRR (Memory Type Range Register) control -3 Jun 1999 -Richard Gooch - + +Richard Gooch - 3 Jun 1999 +Luis R. Rodriguez - April 9, 2015 + +=============================================================================== +Phasing out MTRR use + +MTRR use is replaced on modern x86 hardware with PAT. Over time the only type +of effective MTRR that is expected to be supported will be for write-combining. +As MTRR use is phased out device drivers should use arch_phys_wc_add() to make +MTRR effective on non-PAT systems while a no-op on PAT enabled systems. + +For details refer to Documentation/x86/pat.txt. + +=============================================================================== On Intel P6 family processors (Pentium Pro, Pentium II and later) the Memory Type Range Registers (MTRRs) may be used to control diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index cf08c9fff3cd..521bd8adc3b8 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -34,6 +34,8 @@ ioremap | -- | UC- | UC- | | | | | ioremap_cache | -- | WB | WB | | | | | +ioremap_uc | -- | UC | UC | + | | | | ioremap_nocache | -- | UC- | UC- | | | | | ioremap_wc | -- | -- | WC | @@ -102,7 +104,38 @@ wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc() as step 0 above and also track the usage of those pages and use set_memory_wb() before the page is freed to free pool. +MTRR effects on PAT / non-PAT systems +------------------------------------- +The following table provides the effects of using write-combining MTRRs when +using ioremap*() calls on x86 for both non-PAT and PAT systems. Ideally +mtrr_add() usage will be phased out in favor of arch_phys_wc_add() which will +be a no-op on PAT enabled systems. The region over which a arch_phys_wc_add() +is made, should already have been ioremapped with WC attributes or PAT entries, +this can be done by using ioremap_wc() / set_memory_wc(). Devices which +combine areas of IO memory desired to remain uncacheable with areas where +write-combining is desirable should consider use of ioremap_uc() followed by +set_memory_wc() to white-list effective write-combined areas. Such use is +nevertheless discouraged as the effective memory type is considered +implementation defined, yet this strategy can be used as last resort on devices +with size-constrained regions where otherwise MTRR write-combining would +otherwise not be effective. + +---------------------------------------------------------------------- +MTRR Non-PAT PAT Linux ioremap value Effective memory type +---------------------------------------------------------------------- + Non-PAT | PAT + PAT + |PCD + ||PWT + ||| +WC 000 WB _PAGE_CACHE_MODE_WB WC | WC +WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC +WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | UC +WC 011 UC _PAGE_CACHE_MODE_UC UC | UC +---------------------------------------------------------------------- + +(*) denotes implementation defined and is discouraged Notes: diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ea5f363a1948..04aceb7e6443 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -538,6 +538,9 @@ EXPORT_SYMBOL(mtrr_del); * attempts to add a WC MTRR covering size bytes starting at base and * logs an error if this fails. * + * The called should provide a power of two size on an equivalent + * power of two boundary. + * * Drivers must store the return value to pass to mtrr_del_wc_if_needed, * but drivers should not try to interpret that return value. */ From 7d010fdf299929f9583ce5e17da629dcd83c36ef Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:13 +0200 Subject: [PATCH 322/481] x86/mm/mtrr: Avoid #ifdeffery with phys_wc_to_mtrr_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one user but since we're going to bury MTRR next out of access to drivers, expose this last piece of API to drivers in a general fashion only needing io.h for access to helpers. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Abhilash Kesavan Cc: Andrew Morton Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: Cristian Stoica Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Matthias Brugger Cc: Mel Gorman Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429722736-4473-1-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +++ arch/x86/include/asm/mtrr.h | 5 ----- arch/x86/kernel/cpu/mtrr/main.c | 6 +++--- drivers/gpu/drm/drm_ioctl.c | 14 +------------- include/linux/io.h | 7 +++++++ 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4afc05ffa566..a2b97404922d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -339,6 +339,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff #ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_index(int handle); +#define arch_phys_wc_index arch_phys_wc_index + extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a31759e1edd9..b94f6f64e23d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -48,7 +48,6 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); -extern int phys_wc_to_mtrr_index(int handle); # else static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { @@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } -static inline int phys_wc_to_mtrr_index(int handle) -{ - return -1; -} #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 04aceb7e6443..81baf5fee0e1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -580,7 +580,7 @@ void arch_phys_wc_del(int handle) EXPORT_SYMBOL(arch_phys_wc_del); /* - * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * arch_phys_wc_index - translates arch_phys_wc_add's return value * @handle: Return value from arch_phys_wc_add * * This will turn the return value from arch_phys_wc_add into an mtrr @@ -590,14 +590,14 @@ EXPORT_SYMBOL(arch_phys_wc_del); * in printk line. Alas there is an illegitimate use in some ancient * drm ioctls. */ -int phys_wc_to_mtrr_index(int handle) +int arch_phys_wc_index(int handle) { if (handle < MTRR_TO_PHYS_WC_OFFSET) return -1; else return handle - MTRR_TO_PHYS_WC_OFFSET; } -EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); +EXPORT_SYMBOL_GPL(arch_phys_wc_index); /* * HACK ALERT! diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 266dcd6cdf3b..0a957828b3bd 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -36,9 +36,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -197,16 +194,7 @@ static int drm_getmap(struct drm_device *dev, void *data, map->type = r_list->map->type; map->flags = r_list->map->flags; map->handle = (void *)(unsigned long) r_list->user_token; - -#ifdef CONFIG_X86 - /* - * There appears to be exactly one user of the mtrr index: dritest. - * It's easy enough to keep it working on non-PAT systems. - */ - map->mtrr = phys_wc_to_mtrr_index(r_list->map->mtrr); -#else - map->mtrr = -1; -#endif + map->mtrr = arch_phys_wc_index(r_list->map->mtrr); mutex_unlock(&dev->struct_mutex); diff --git a/include/linux/io.h b/include/linux/io.h index 986f2bffea1e..04cce4da3685 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -111,6 +111,13 @@ static inline void arch_phys_wc_del(int handle) } #define arch_phys_wc_add arch_phys_wc_add +#ifndef arch_phys_wc_index +static inline int arch_phys_wc_index(int handle) +{ + return -1; +} +#define arch_phys_wc_index arch_phys_wc_index +#endif #endif #endif /* _LINUX_IO_H */ From f9626104a5b6815ec7d65789dfb900af5fa51e64 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:14 +0200 Subject: [PATCH 323/481] x86/mm/mtrr: Generalize runtime disabling of MTRRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible to enable CONFIG_MTRR and CONFIG_X86_PAT and end up with a system with MTRR functionality disabled but PAT functionality enabled. This can happen, for instance, when the Xen hypervisor is used where MTRRs are not supported but PAT is. This can happen on Linux as of commit 47591df50512 ("xen: Support Xen pv-domains using PAT") by Juergen, introduced in v3.19. Technically, we should assume the proper CPU bits would be set to disable MTRRs but we can't always rely on this. At least on the Xen Hypervisor, for instance, only X86_FEATURE_MTRR was disabled as of Xen 4.4 through Xen commit 586ab6a [0], but not X86_FEATURE_K6_MTRR, X86_FEATURE_CENTAUR_MCR, or X86_FEATURE_CYRIX_ARR for instance. Roger Pau Monné has clarified though that although this is technically true we will never support PVH on these CPU types so Xen has no need to disable these bits on those systems. As per Roger, AMD K6, Centaur and VIA chips don't have the necessary hardware extensions to allow running PVH guests [1]. As per Toshi it is also possible for the BIOS to disable MTRR support, in such cases get_mtrr_state() would update the MTRR state as per the BIOS, we need to propagate this information as well. x86 MTRR code relies on quite a bit of checks for mtrr_if being set to check to see if MTRRs did get set up. Instead, lets provide a generic getter for that. This also adds a few checks where they were not before which could potentially safeguard ourselves against incorrect usage of MTRR where this was not desirable. Where possible match error codes as if MTRRs were disabled on arch/x86/include/asm/mtrr.h. Lastly, since disabling MTRRs can happen at run time and we could end up with PAT enabled, best record now in our logs when MTRRs are disabled. [0] ~/devel/xen (git::stable-4.5)$ git describe --contains 586ab6a 4.4.0-rc1~18 [1] http://lists.xenproject.org/archives/html/xen-devel/2015-03/msg03460.html Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Roger Pau Monné Cc: Stefan Bader Cc: Suresh Siddha Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: bhelgaas@google.com Cc: david.vrabel@citrix.com Cc: jbeulich@suse.com Cc: konrad.wilk@oracle.com Cc: venkatesh.pallipadi@intel.com Cc: ville.syrjala@linux.intel.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1426893517-2511-3-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-12-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++- arch/x86/kernel/cpu/mtrr/main.c | 39 ++++++++++++++++++++++++------ arch/x86/kernel/cpu/mtrr/mtrr.h | 2 +- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index f782d9b62cb3..3b533cf37c74 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -445,7 +445,7 @@ static void __init print_mtrr_state(void) } /* Grab all of the MTRR state for this CPU into *state */ -void __init get_mtrr_state(void) +bool __init get_mtrr_state(void) { struct mtrr_var_range *vrs; unsigned long flags; @@ -489,6 +489,8 @@ void __init get_mtrr_state(void) post_set(); local_irq_restore(flags); + + return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); } /* Some BIOS's are messed up and don't set all MTRRs the same! */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 81baf5fee0e1..383efb26e516 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -59,6 +59,12 @@ #define MTRR_TO_PHYS_WC_OFFSET 1000 u32 num_var_ranges; +static bool __mtrr_enabled; + +static bool mtrr_enabled(void) +{ + return __mtrr_enabled; +} unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); @@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, int i, replace, error; mtrr_type ltype; - if (!mtrr_if) + if (!mtrr_enabled()) return -ENXIO; error = mtrr_if->validate_add_page(base, size, type); @@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size) int mtrr_add(unsigned long base, unsigned long size, unsigned int type, bool increment) { + if (!mtrr_enabled()) + return -ENODEV; if (mtrr_check(base, size)) return -EINVAL; return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, @@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) unsigned long lbase, lsize; int error = -EINVAL; - if (!mtrr_if) - return -ENXIO; + if (!mtrr_enabled()) + return -ENODEV; max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ @@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) */ int mtrr_del(int reg, unsigned long base, unsigned long size) { + if (!mtrr_enabled()) + return -ENODEV; if (mtrr_check(base, size)) return -EINVAL; return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); @@ -548,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size) { int ret; - if (pat_enabled) + if (pat_enabled || !mtrr_enabled()) return 0; /* Success! (We don't need to do anything.) */ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); @@ -737,10 +747,12 @@ void __init mtrr_bp_init(void) } if (mtrr_if) { + __mtrr_enabled = true; set_num_var_ranges(); init_table(); if (use_intel()) { - get_mtrr_state(); + /* BIOS may override */ + __mtrr_enabled = get_mtrr_state(); if (mtrr_cleanup(phys_addr)) { changed_by_mtrr_cleanup = 1; @@ -748,10 +760,16 @@ void __init mtrr_bp_init(void) } } } + + if (!mtrr_enabled()) + pr_info("MTRR: Disabled\n"); } void mtrr_ap_init(void) { + if (!mtrr_enabled()) + return; + if (!use_intel() || mtrr_aps_delayed_init) return; /* @@ -777,6 +795,9 @@ void mtrr_save_state(void) { int first_cpu; + if (!mtrr_enabled()) + return; + get_online_cpus(); first_cpu = cpumask_first(cpu_online_mask); smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); @@ -785,6 +806,8 @@ void mtrr_save_state(void) void set_mtrr_aps_delayed_init(void) { + if (!mtrr_enabled()) + return; if (!use_intel()) return; @@ -796,7 +819,7 @@ void set_mtrr_aps_delayed_init(void) */ void mtrr_aps_init(void) { - if (!use_intel()) + if (!use_intel() || !mtrr_enabled()) return; /* @@ -813,7 +836,7 @@ void mtrr_aps_init(void) void mtrr_bp_restore(void) { - if (!use_intel()) + if (!use_intel() || !mtrr_enabled()) return; mtrr_if->set_all(); @@ -821,7 +844,7 @@ void mtrr_bp_restore(void) static int __init mtrr_init_finialize(void) { - if (!mtrr_if) + if (!mtrr_enabled()) return 0; if (use_intel()) { diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index df5e41f31a27..951884dcc433 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); -void get_mtrr_state(void); +bool get_mtrr_state(void); extern void set_mtrr_ops(const struct mtrr_ops *ops); From cb32edf65bf2197a2d2226e94c7602dc92e295bb Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:15 +0200 Subject: [PATCH 324/481] x86/mm/pat: Wrap pat_enabled into a function API We use pat_enabled in x86-specific code to see if PAT is enabled or not but we're granting full access to it even though readers do not need to set it. If, for instance, we granted access to it to modules later they then could override the variable setting... no bueno. This renames pat_enabled to a new static variable __pat_enabled. Folks are redirected to use pat_enabled() now. Code that sets this can only be internal to pat.c. Apart from the early kernel parameter "nopat" to disable PAT, we also have a few cases that disable it later and make use of a helper pat_disable(). It is wrapped under an ifdef but since that code cannot run unless PAT was enabled its not required to wrap it with ifdefs, unwrap that. Likewise, since "nopat" doesn't really change non-PAT systems just remove that ifdef as well. Although we could add and use an early_param_off(), these helpers don't use __read_mostly but we want to keep __read_mostly for __pat_enabled as this is a hot path -- upon boot, for instance, a simple guest may see ~4k accesses to pat_enabled(). Since __read_mostly early boot params are not that common we don't add a helper for them just yet. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Andy Walls Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Lameter Cc: Daniel Vetter Cc: Dave Airlie Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kyle McMartin Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430425520-22275-3-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 7 +------ arch/x86/kernel/cpu/mtrr/main.c | 2 +- arch/x86/mm/iomap_32.c | 2 +- arch/x86/mm/ioremap.c | 4 ++-- arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pat.c | 35 +++++++++++++++------------------ arch/x86/pci/i386.c | 6 +++--- 7 files changed, 25 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 91bc4ba95f91..cdcff7f7f694 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -4,12 +4,7 @@ #include #include -#ifdef CONFIG_X86_PAT -extern int pat_enabled; -#else -static const int pat_enabled; -#endif - +bool pat_enabled(void); extern void pat_init(void); void pat_init_cache_modes(void); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 383efb26e516..e7ed0d8ebacb 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -558,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size) { int ret; - if (pat_enabled || !mtrr_enabled()) + if (pat_enabled() || !mtrr_enabled()) return 0; /* Success! (We don't need to do anything.) */ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..3a2ec8790ca7 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -82,7 +82,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) * MTRR is UC or WC. UC_MINUS gets the real intention, of the * user, which is "WC if the MTRR is WC, UC if you can't do that." */ - if (!pat_enabled && pgprot_val(prot) == + if (!pat_enabled() && pgprot_val(prot) == (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC))) prot = __pgprot(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a493bb83aa89..82d63ed70045 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -234,7 +234,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; + * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. Drivers that are certain they need or can already @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(ioremap_uc); */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { - if (pat_enabled) + if (pat_enabled()) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, __builtin_return_address(0)); else diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 397838eb292b..70d221fe2eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1571,7 +1571,7 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled) + if (!pat_enabled()) return set_memory_uc(addr, numpages); ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8c50b9bfa996..484dce7f759b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -36,12 +36,11 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -#ifdef CONFIG_X86_PAT -int __read_mostly pat_enabled = 1; +static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); static inline void pat_disable(const char *reason) { - pat_enabled = 0; + __pat_enabled = 0; pr_info("x86/PAT: %s\n", reason); } @@ -51,13 +50,11 @@ static int __init nopat(char *str) return 0; } early_param("nopat", nopat); -#else -static inline void pat_disable(const char *reason) -{ - (void)reason; -} -#endif +bool pat_enabled(void) +{ + return !!__pat_enabled; +} int pat_debug_enable; @@ -201,7 +198,7 @@ void pat_init(void) u64 pat; bool boot_cpu = !boot_pat_state; - if (!pat_enabled) + if (!pat_enabled()) return; if (!cpu_has_pat) { @@ -402,7 +399,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, BUG_ON(start >= end); /* end is exclusive */ - if (!pat_enabled) { + if (!pat_enabled()) { /* This is identical to page table setting without PAT */ if (new_type) { if (req_type == _PAGE_CACHE_MODE_WC) @@ -477,7 +474,7 @@ int free_memtype(u64 start, u64 end) int is_range_ram; struct memtype *entry; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Low ISA region is always mapped WB. No need to track */ @@ -625,7 +622,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) u64 to = from + size; u64 cursor = from; - if (!pat_enabled) + if (!pat_enabled()) return 1; while (cursor < to) { @@ -661,7 +658,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_enabled && + if (!pat_enabled() && !(boot_cpu_has(X86_FEATURE_MTRR) || boot_cpu_has(X86_FEATURE_K6_MTRR) || boot_cpu_has(X86_FEATURE_CYRIX_ARR) || @@ -730,7 +727,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, * the type requested matches the type of first page in the range. */ if (is_ram) { - if (!pat_enabled) + if (!pat_enabled()) return 0; pcm = lookup_memtype(paddr); @@ -844,7 +841,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return ret; } - if (!pat_enabled) + if (!pat_enabled()) return 0; /* @@ -872,7 +869,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, { enum page_cache_mode pcm; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Set prot based on lookup */ @@ -913,7 +910,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, pgprot_t pgprot_writecombine(pgprot_t prot) { - if (pat_enabled) + if (pat_enabled()) return __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); else @@ -996,7 +993,7 @@ static const struct file_operations memtype_fops = { static int __init pat_memtype_list_init(void) { - if (pat_enabled) { + if (pat_enabled()) { debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, NULL, &memtype_fops); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 349c0d32cc0b..0a9f2caf358f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, * Caller can followup with UC MINUS request and add a WC mtrr if there * is a free mtrr slot. */ - if (!pat_enabled && write_combine) + if (!pat_enabled() && write_combine) return -EINVAL; - if (pat_enabled && write_combine) + if (pat_enabled() && write_combine) prot |= cachemode2protval(_PAGE_CACHE_MODE_WC); - else if (pat_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled() || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here From fbe7193aa4787f27c84216d130ab877efc310d57 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:16 +0200 Subject: [PATCH 325/481] x86/mm/pat: Export pat_enabled() Two Linux device drivers cannot work with PAT and the work required to make them work is significant. There is not enough motivation to convert these drivers over to use PAT properly, the compromise reached is to let drivers that cannot be ported to PAT check if PAT was enabled and if so fail on probe with a recommendation to boot with the "nopat" kernel parameter. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Andy Walls Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430425520-22275-4-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-14-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 484dce7f759b..a1c96544099d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -55,6 +55,7 @@ bool pat_enabled(void) { return !!__pat_enabled; } +EXPORT_SYMBOL_GPL(pat_enabled); int pat_debug_enable; From 3a1407559a593d4360af12dd2df5296bf8eb0d28 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Wed, 27 May 2015 04:22:07 +0000 Subject: [PATCH 326/481] dm: fix NULL pointer when clone_and_map_rq returns !DM_MAPIO_REMAPPED When stacking request-based DM on blk_mq device, request cloning and remapping are done in a single call to target's clone_and_map_rq(). The clone is allocated and valid only if clone_and_map_rq() returns DM_MAPIO_REMAPPED. The "IS_ERR(clone)" check in map_request() does not cover all the !DM_MAPIO_REMAPPED cases that are possible (E.g. if underlying devices are not ready or unavailable, clone_and_map_rq() may return DM_MAPIO_REQUEUE without ever having established an ERR_PTR). Fix this by explicitly checking for a return that is not DM_MAPIO_REMAPPED in map_request(). Without this fix, DM core may call setup_clone() for a NULL clone and oops like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 IP: [] blk_rq_prep_clone+0x7d/0x137 ... CPU: 2 PID: 5793 Comm: kdmwork-253:3 Not tainted 4.0.0-nm #1 ... Call Trace: [] map_tio_request+0xa9/0x258 [dm_mod] [] kthread_worker_fn+0xfd/0x150 [] ? kthread_parkme+0x24/0x24 [] ? kthread_parkme+0x24/0x24 [] kthread+0xe6/0xee [] ? put_lock_stats+0xe/0x20 [] ? __init_kthread_worker+0x5b/0x5b [] ret_from_fork+0x58/0x90 [] ? __init_kthread_worker+0x5b/0x5b Fixes: e5863d9ad ("dm: allocate requests in target when stacking on blk-mq devices") Reported-by: Bart Van Assche Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.0+ --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0bf79a0bad37..1c62ed8d09f4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1972,8 +1972,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, dm_kill_unmapped_request(rq, r); return r; } - if (IS_ERR(clone)) - return DM_MAPIO_REQUEUE; + if (r != DM_MAPIO_REMAPPED) + return r; if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { /* -ENOMEM */ ti->type->release_clone_rq(clone); From 0fa372b6c95013af1334b3d5c9b5f03a70ecedab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 May 2015 16:17:19 +0200 Subject: [PATCH 327/481] ALSA: hda - Fix noise on AMD radeon 290x controller A new AMD controller [1002:aac8] seems to need the quirk for other AMD NS HDMI stuff, otherwise it gives noisy sounds. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=99021 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 34040d26c94f..fea198c58196 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2089,6 +2089,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaac8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, /* VIA VT8251/VT8237A */ { PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA }, From 2d1c18bba15daf89d75ce475ecd2068f483aa12f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 May 2015 11:43:53 -0400 Subject: [PATCH 328/481] Revert "drm/radeon: only mark audio as connected if the monitor supports it (v3)" This breaks too many things. bugs: https://bugzilla.kernel.org/show_bug.cgi?id=99041 https://bugs.freedesktop.org/show_bug.cgi?id=90681 This reverts commit 0f55db36d49d45b80eff0c0a2a498766016f458b. Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_audio.c | 27 ++++++++++------------ drivers/gpu/drm/radeon/radeon_connectors.c | 8 ++----- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index dcb779647c57..25191f126f3b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -460,9 +460,6 @@ void radeon_audio_detect(struct drm_connector *connector, if (!connector || !connector->encoder) return; - if (!radeon_encoder_is_digital(connector->encoder)) - return; - rdev = connector->encoder->dev->dev_private; if (!radeon_audio_chipset_supported(rdev)) @@ -471,26 +468,26 @@ void radeon_audio_detect(struct drm_connector *connector, radeon_encoder = to_radeon_encoder(connector->encoder); dig = radeon_encoder->enc_priv; - if (!dig->afmt) - return; - if (status == connector_status_connected) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector *radeon_connector; + int sink_type; + + if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_encoder->audio = NULL; + return; + } + + radeon_connector = to_radeon_connector(connector); + sink_type = radeon_dp_getsinktype(radeon_connector); if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - radeon_dp_getsinktype(radeon_connector) == - CONNECTOR_OBJECT_ID_DISPLAYPORT) + sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) radeon_encoder->audio = rdev->audio.dp_funcs; else radeon_encoder->audio = rdev->audio.hdmi_funcs; dig->afmt->pin = radeon_audio_get_pin(connector->encoder); - if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); - } else { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; - } + radeon_audio_enable(rdev, dig->afmt->pin, 0xf); } else { radeon_audio_enable(rdev, dig->afmt->pin, 0); dig->afmt->pin = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index d17d251dbd4f..cebb65e07e1d 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1379,10 +1379,8 @@ out: /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) { - radeon_connector_get_edid(connector); + if (radeon_audio != 0) radeon_audio_detect(connector, ret); - } exit: pm_runtime_mark_last_busy(connector->dev->dev); @@ -1719,10 +1717,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) { - radeon_connector_get_edid(connector); + if (radeon_audio != 0) radeon_audio_detect(connector, ret); - } out: pm_runtime_mark_last_busy(connector->dev->dev); From ce0e5c522d3924090c20e774359809a7aa08c44c Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 27 May 2015 11:44:32 +0100 Subject: [PATCH 329/481] xen/netback: Properly initialize credit_bytes Commit e9ce7cb6b107 ("xen-netback: Factor queue-specific data into queue struct") introduced a regression when moving queue-specific data into the queue struct by failing to set the credit_bytes field. This prevented bandwidth limiting from working. Initialize the field as it was done before multiqueue support was added. Signed-off-by: Ross Lagerwall Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 3d8dbf5f2d39..fee02414529e 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -793,6 +793,7 @@ static void connect(struct backend_info *be) goto err; } + queue->credit_bytes = credit_bytes; queue->remaining_credit = credit_bytes; queue->credit_usec = credit_usec; From 83a35114d0e4583e6b0ca39502e68b6a92e2910c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 27 May 2015 10:59:26 +0930 Subject: [PATCH 330/481] lguest: fix out-by-one error in address checking. This bug has been there since day 1; addresses in the top guest physical page weren't considered valid. You could map that page (the check in check_gpte() is correct), but if a guest tried to put a pagetable there we'd check that address manually when walking it, and kill the guest. Signed-off-by: Rusty Russell Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/lguest/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7dc93aa004c8..312ffd3d0017 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -173,7 +173,7 @@ static void unmap_switcher(void) bool lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len) { - return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); + return addr+len <= lg->pfn_limit * PAGE_SIZE && (addr+len >= addr); } /* From f4ecf29fd78d19a9301e638eaa1419e5c8fbdce1 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 22 May 2015 16:12:26 -0700 Subject: [PATCH 331/481] mlx4_core: Fix fallback from MSI-X to INTx The test in mlx4_load_one() to remove MLX4_FLAG_MSI_X expects mlx4_NOP() to fail with -EBUSY. It is also necessary to avoid the reset since the device is not fully reinitialized before calling mlx4_start_hca() a second time. Note that this will also affect mlx4_test_interrupts(), the only other user of MLX4_CMD_NOP. Fixes: f5aef5a ("net/mlx4_core: Activate reset flow upon fatal command cases") Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 4f7dc044601e..529ef0594b90 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -714,8 +714,13 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EIO; - goto out_reset; + if (op == MLX4_CMD_NOP) { + err = -EBUSY; + goto out; + } else { + err = -EIO; + goto out_reset; + } } err = context->result; From fbfd3bc7dfd7efcad2d2e52bf634f84c80a77a35 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 May 2015 11:33:26 -0400 Subject: [PATCH 332/481] drm/radeon/audio: make sure connector is valid in hotplug case Avoids a crash when a monitor is hotplugged and the encoder and connector are not linked yet. bug: https://bugs.freedesktop.org/show_bug.cgi?id=90681 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 0926739c9fa7..9953356fe263 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -400,7 +400,7 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (enable) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (connector && drm_detect_monitor_audio(radeon_connector_edid(connector))) { WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, HDMI_AVI_INFO_SEND | /* enable AVI info frames */ HDMI_AVI_INFO_CONT | /* required for audio info values to be updated */ @@ -438,7 +438,8 @@ void evergreen_dp_enable(struct drm_encoder *encoder, bool enable) if (!dig || !dig->afmt) return; - if (enable && drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (enable && connector && + drm_detect_monitor_audio(radeon_connector_edid(connector))) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct radeon_connector_atom_dig *dig_connector; From ad0681185770716523c81b156c44b9804d7b8ed2 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 27 May 2015 15:46:10 +0100 Subject: [PATCH 333/481] xen-netfront: properly destroy queues when removing device xennet_remove() freed the queues before freeing the netdevice which results in a use-after-free when free_netdev() tries to delete the napi instances that have already been freed. Fix this by fully destroy the queues (which includes deleting the napi instances) before freeing the netdevice. Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3f45afd4382e..e031c943286e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1698,6 +1698,7 @@ static void xennet_destroy_queues(struct netfront_info *info) if (netif_running(info->netdev)) napi_disable(&queue->napi); + del_timer_sync(&queue->rx_refill_timer); netif_napi_del(&queue->napi); } @@ -2102,9 +2103,6 @@ static const struct attribute_group xennet_dev_group = { static int xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); - unsigned int num_queues = info->netdev->real_num_tx_queues; - struct netfront_queue *queue = NULL; - unsigned int i = 0; dev_dbg(&dev->dev, "%s\n", dev->nodename); @@ -2112,16 +2110,7 @@ static int xennet_remove(struct xenbus_device *dev) unregister_netdev(info->netdev); - for (i = 0; i < num_queues; ++i) { - queue = &info->queues[i]; - del_timer_sync(&queue->rx_refill_timer); - } - - if (num_queues) { - kfree(info->queues); - info->queues = NULL; - } - + xennet_destroy_queues(info); xennet_free_netdev(info->netdev); return 0; From 86e363dc3b50bfd50a1f315934583fbda673ab8d Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 26 May 2015 16:08:48 -0700 Subject: [PATCH 334/481] net_sched: invoke ->attach() after setting dev->qdisc For mq qdisc, we add per tx queue qdisc to root qdisc for display purpose, however, that happens too early, before the new dev->qdisc is finally set, this causes q->list points to an old root qdisc which is going to be freed right before assigning with a new one. Fix this by moving ->attach() after setting dev->qdisc. For the record, this fixes the following crash: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 975 at lib/list_debug.c:59 __list_del_entry+0x5a/0x98() list_del corruption. prev->next should be ffff8800d1998ae8, but was 6b6b6b6b6b6b6b6b CPU: 1 PID: 975 Comm: tc Not tainted 4.1.0-rc4+ #1019 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000009 ffff8800d73fb928 ffffffff81a44e7f 0000000047574756 ffff8800d73fb978 ffff8800d73fb968 ffffffff810790da ffff8800cfc4cd20 ffffffff814e725b ffff8800d1998ae8 ffffffff82381250 0000000000000000 Call Trace: [] dump_stack+0x4c/0x65 [] warn_slowpath_common+0x9c/0xb6 [] ? __list_del_entry+0x5a/0x98 [] warn_slowpath_fmt+0x46/0x48 [] ? dev_graft_qdisc+0x5e/0x6a [] __list_del_entry+0x5a/0x98 [] list_del+0xe/0x2d [] qdisc_list_del+0x1e/0x20 [] qdisc_destroy+0x30/0xd6 [] qdisc_graft+0x11d/0x243 [] tc_get_qdisc+0x1a6/0x1d4 [] ? mark_lock+0x2e/0x226 [] rtnetlink_rcv_msg+0x181/0x194 [] ? rtnl_lock+0x17/0x19 [] ? rtnl_lock+0x17/0x19 [] ? __rtnl_unlock+0x17/0x17 [] netlink_rcv_skb+0x4d/0x93 [] rtnetlink_rcv+0x26/0x2d [] netlink_unicast+0xcb/0x150 [] ? might_fault+0x59/0xa9 [] netlink_sendmsg+0x4fa/0x51c [] sock_sendmsg_nosec+0x12/0x1d [] sock_sendmsg+0x29/0x2e [] ___sys_sendmsg+0x1b4/0x23a [] ? native_sched_clock+0x35/0x37 [] ? sched_clock_local+0x12/0x72 [] ? sched_clock_cpu+0x9e/0xb7 [] ? current_kernel_time+0xe/0x32 [] ? lock_release_holdtime.part.29+0x71/0x7f [] ? read_seqcount_begin.constprop.27+0x5f/0x76 [] ? trace_hardirqs_on_caller+0x17d/0x199 [] ? __fget_light+0x50/0x78 [] __sys_sendmsg+0x42/0x60 [] SyS_sendmsg+0x12/0x1c [] system_call_fastpath+0x12/0x6f ---[ end trace ef29d3fb28e97ae7 ]--- For long term, we probably need to clean up the qdisc_graft() code in case it hides other bugs like this. Fixes: 95dc19299f74 ("pkt_sched: give visibility to mq slave qdiscs") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_api.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9eed70bc8f..1e1c89e51a11 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -815,10 +815,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); - if (new && new->ops->attach) { - new->ops->attach(new); - num_q = 0; - } + if (new && new->ops->attach) + goto skip; for (i = 0; i < num_q; i++) { struct netdev_queue *dev_queue = dev_ingress_queue(dev); @@ -834,12 +832,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_destroy(old); } +skip: if (!ingress) { notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; + + if (new && new->ops->attach) + new->ops->attach(new); } else { notify_and_destroy(net, skb, n, classid, old, new); } From 9302d7bb0c5cd46be5706859301f18c137b2439f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 26 May 2015 17:30:17 -0600 Subject: [PATCH 335/481] sctp: Fix mangled IPv4 addresses on a IPv6 listening socket sctp_v4_map_v6 was subtly writing and reading from members of a union in a way the clobbered data it needed to read before it read it. Zeroing the v6 flowinfo overwrites the v4 sin_addr with 0, meaning that every place that calls sctp_v4_map_v6 gets ::ffff:0.0.0.0 as the result. Reorder things to guarantee correct behaviour no matter what the union layout is. This impacts user space clients that open an IPv6 SCTP socket and receive IPv4 connections. Prior to 299ee user space would see a sockaddr with AF_INET and a correct address, after 299ee the sockaddr is AF_INET6, but the address is wrong. Fixes: 299ee123e198 (sctp: Fixup v4mapped behaviour to comply with Sock API) Signed-off-by: Jason Gunthorpe Acked-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c56a438c3a1e..ce13cf20f625 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr) /* Map v4 address to v4-mapped v6 address */ static inline void sctp_v4_map_v6(union sctp_addr *addr) { + __be16 port; + + port = addr->v4.sin_port; + addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + addr->v6.sin6_port = port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_scope_id = 0; - addr->v6.sin6_port = addr->v4.sin_port; - addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; addr->v6.sin6_addr.s6_addr32[0] = 0; addr->v6.sin6_addr.s6_addr32[1] = 0; addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff); From c545f799c7470682756ca64c63f6bfcdaf28442b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 May 2015 21:20:12 +0200 Subject: [PATCH 336/481] ALSA: hda - Disable power_save_node for IDT92HD71bxx We've got a regression report that 4.1-rc causes noises on a Dell laptop. Similar like Realtek codec, this seems also triggered by the recent power_save_node feature. As this kind of issue is quite hard to debug without actual hardware, disable the power_save_node flag for this codec as a workaround. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=98971 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 0de0fd95144a..6833c74ed6ff 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4696,7 +4696,8 @@ static int patch_stac92hd71bxx(struct hda_codec *codec) return err; spec = codec->spec; - codec->power_save_node = 1; + /* disabled power_save_node since it causes noises on a Dell machine */ + /* codec->power_save_node = 1; */ spec->linear_tone_beep = 0; spec->gen.own_eapd_ctl = 1; spec->gen.power_down_unused = 1; From e275b3885dffd31095984ed2476ed0447fa7309a Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Wed, 15 Apr 2015 10:09:50 -0700 Subject: [PATCH 337/481] tools/power turbostat: correctly display more than 2 threads/core Without this update, turbostat displays only 2 threads per core. Some processors, such as Xeon Phi, have more. Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 71 +++++++++++++++++++-------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bac98ca3d4ca..d85adbafbe60 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1381,12 +1381,41 @@ int parse_int_file(const char *fmt, ...) } /* - * cpu_is_first_sibling_in_core(cpu) - * return 1 if given CPU is 1st HT sibling in the core + * get_cpu_position_in_core(cpu) + * return the position of the CPU among its HT siblings in the core + * return -1 if the sibling is not in list */ -int cpu_is_first_sibling_in_core(int cpu) +int get_cpu_position_in_core(int cpu) { - return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + char path[64]; + FILE *filep; + int this_cpu; + char character; + int i; + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", + cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + + for (i = 0; i < topo.num_threads_per_core; i++) { + fscanf(filep, "%d", &this_cpu); + if (this_cpu == cpu) { + fclose(filep); + return i; + } + + /* Account for no separator after last thread*/ + if (i != (topo.num_threads_per_core - 1)) + fscanf(filep, "%c", &character); + } + + fclose(filep); + return -1; } /* @@ -1412,25 +1441,31 @@ int get_num_ht_siblings(int cpu) { char path[80]; FILE *filep; - int sib1, sib2; - int matches; + int sib1; + int matches = 0; char character; + char str[100]; + char *ch; sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); filep = fopen_or_die(path, "r"); + /* * file format: - * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) - * otherwinse 1 sibling (self). + * A ',' separated or '-' separated set of numbers + * (eg 1-2 or 1,3,4,5) */ - matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + fscanf(filep, "%d%c\n", &sib1, &character); + fseek(filep, 0, SEEK_SET); + fgets(str, 100, filep); + ch = strchr(str, character); + while (ch != NULL) { + matches++; + ch = strchr(ch+1, character); + } fclose(filep); - - if (matches == 3) - return 2; - else - return 1; + return matches+1; } /* @@ -2755,13 +2790,9 @@ int initialize_counters(int cpu_id) my_package_id = get_physical_package_id(cpu_id); my_core_id = get_core_id(cpu_id); - - if (cpu_is_first_sibling_in_core(cpu_id)) { - my_thread_id = 0; + my_thread_id = get_cpu_position_in_core(cpu_id); + if (!my_thread_id) topo.num_cores++; - } else { - my_thread_id = 1; - } init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); From 4c6dd53dd3674c310d7379c6b3273daa9fd95c79 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 27 May 2015 15:23:56 -0400 Subject: [PATCH 338/481] dm mpath: fix leak of dm_mpath_io structure in blk-mq .queue_rq error path Otherwise kmemleak reported: unreferenced object 0xffff88009b14e2b0 (size 16): comm "fio", pid 4274, jiffies 4294978034 (age 1253.210s) hex dump (first 16 bytes): 40 12 f3 99 01 88 ff ff 00 10 00 00 00 00 00 00 @............... backtrace: [] kmemleak_alloc+0x49/0xb0 [] kmem_cache_alloc+0xf8/0x160 [] mempool_alloc_slab+0x10/0x20 [] mempool_alloc+0x57/0x150 [] __multipath_map.isra.17+0xe1/0x220 [dm_multipath] [] multipath_clone_and_map+0x15/0x20 [dm_multipath] [] map_request.isra.39+0xd5/0x220 [dm_mod] [] dm_mq_queue_rq+0x134/0x240 [dm_mod] [] __blk_mq_run_hw_queue+0x1d5/0x380 [] blk_mq_run_hw_queue+0xc5/0x100 [] blk_sq_make_request+0x240/0x300 [] generic_make_request+0xc0/0x110 [] submit_bio+0x72/0x150 [] do_blockdev_direct_IO+0x1f3b/0x2da0 [] __blockdev_direct_IO+0x3e/0x40 [] ext4_direct_IO+0x1aa/0x390 Fixes: e5863d9ad ("dm: allocate requests in target when stacking on blk-mq devices") Reported-by: Bart Van Assche Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.0+ --- drivers/md/dm-mpath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 63953477a07c..eff7bdd7731d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, /* blk-mq request-based interface */ *__clone = blk_get_request(bdev_get_queue(bdev), rq_data_dir(rq), GFP_ATOMIC); - if (IS_ERR(*__clone)) + if (IS_ERR(*__clone)) { /* ENOMEM, requeue */ + clear_mapinfo(m, map_context); return r; + } (*__clone)->bio = (*__clone)->biotail = NULL; (*__clone)->rq_disk = bdev->bd_disk; (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; From 45714fbed4556149d7f1730f5bae74f81d5e2cd5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 27 May 2015 15:25:27 -0400 Subject: [PATCH 339/481] dm: requeue from blk-mq dm_mq_queue_rq() using BLK_MQ_RQ_QUEUE_BUSY Use BLK_MQ_RQ_QUEUE_BUSY to requeue a blk-mq request directly from the DM blk-mq device's .queue_rq. This cleans up the previous convoluted handling of request requeueing that would return BLK_MQ_RQ_QUEUE_OK (even though it wasn't) and then run blk_mq_requeue_request() followed by blk_mq_kick_requeue_list(). Also, document that DM blk-mq ontop of old request_fn devices cannot fail in clone_rq() since the clone request is preallocated as part of the pdu. Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1c62ed8d09f4..1badfb250a18 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2754,13 +2754,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { /* clone request is allocated at the end of the pdu */ tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); - if (!clone_rq(rq, md, tio, GFP_ATOMIC)) - return BLK_MQ_RQ_QUEUE_BUSY; + (void) clone_rq(rq, md, tio, GFP_ATOMIC); queue_kthread_work(&md->kworker, &tio->work); } else { /* Direct call is fine since .queue_rq allows allocations */ - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) - dm_requeue_unmapped_original_request(md, rq); + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { + /* Undo dm_start_request() before requeuing */ + rq_completed(md, rq_data_dir(rq), false); + return BLK_MQ_RQ_QUEUE_BUSY; + } } return BLK_MQ_RQ_QUEUE_OK; From fb5d432722e186c656285ccc088e35dbe24f6fd1 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Wed, 20 May 2015 09:49:34 -0700 Subject: [PATCH 340/481] tools/power turbostat: enable turbostat to support Knights Landing (KNL) Changes mainly to account for minor differences in Knights Landing(KNL): 1. KNL supports C1 and C6 core states. 2. KNL supports PC2, PC3 and PC6 package states. 3. KNL has a different encoding of the TURBO_RATIO_LIMIT MSR Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Len Brown --- arch/x86/include/uapi/asm/msr-index.h | 1 + tools/power/x86/turbostat/turbostat.c | 105 +++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c469490db4a8..3c6bb342a48f 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -140,6 +140,7 @@ #define MSR_CORE_C3_RESIDENCY 0x000003fc #define MSR_CORE_C6_RESIDENCY 0x000003fd #define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff #define MSR_PKG_C2_RESIDENCY 0x0000060d #define MSR_PKG_C8_RESIDENCY 0x00000630 #define MSR_PKG_C9_RESIDENCY 0x00000631 diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d85adbafbe60..256a5e1de381 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -52,6 +52,7 @@ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_knl_cstates; unsigned int do_pc2; unsigned int do_pc3; unsigned int do_pc6; @@ -316,7 +317,7 @@ void print_header(void) if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c1"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, " CPU%%c3"); if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c6"); @@ -546,7 +547,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); @@ -1018,14 +1019,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; + } else if (do_knl_cstates) { + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) + return -7; } if (do_snb_cstates) @@ -1295,6 +1299,67 @@ dump_nhm_turbo_ratio_limits(void) return; } +static void +dump_knl_turbo_ratio_limits(void) +{ + int cores; + unsigned int ratio; + unsigned long long msr; + int delta_cores; + int delta_ratio; + int i; + + get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + msr); + + /** + * Turbo encoding in KNL is as follows: + * [7:0] -- Base value of number of active cores of bucket 1. + * [15:8] -- Base value of freq ratio of bucket 1. + * [20:16] -- +ve delta of number of active cores of bucket 2. + * i.e. active cores of bucket 2 = + * active cores of bucket 1 + delta + * [23:21] -- Negative delta of freq ratio of bucket 2. + * i.e. freq ratio of bucket 2 = + * freq ratio of bucket 1 - delta + * [28:24]-- +ve delta of number of active cores of bucket 3. + * [31:29]-- -ve delta of freq ratio of bucket 3. + * [36:32]-- +ve delta of number of active cores of bucket 4. + * [39:37]-- -ve delta of freq ratio of bucket 4. + * [44:40]-- +ve delta of number of active cores of bucket 5. + * [47:45]-- -ve delta of freq ratio of bucket 5. + * [52:48]-- +ve delta of number of active cores of bucket 6. + * [55:53]-- -ve delta of freq ratio of bucket 6. + * [60:56]-- +ve delta of number of active cores of bucket 7. + * [63:61]-- -ve delta of freq ratio of bucket 7. + */ + cores = msr & 0xFF; + ratio = (msr >> 8) && 0xFF; + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + + for (i = 16; i < 64; i = i + 8) { + delta_cores = (msr >> i) & 0x1F; + delta_ratio = (msr >> (i + 5)) && 0x7; + if (!delta_cores || !delta_ratio) + return; + cores = cores + delta_cores; + ratio = ratio - delta_ratio; + + /** -ve ratios will make successive ratio calculations + * negative. Hence return instead of carrying on. + */ + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + } +} + static void dump_nhm_cst_cfg(void) { @@ -1788,6 +1853,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) } } +int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x57: /* Knights Landing */ + return 1; + default: + return 0; + } +} static void dump_cstate_pstate_config_info(family, model) { @@ -1805,6 +1885,9 @@ dump_cstate_pstate_config_info(family, model) if (has_nhm_turbo_ratio_limit(family, model)) dump_nhm_turbo_ratio_limits(); + if (has_knl_turbo_ratio_limit(family, model)) + dump_knl_turbo_ratio_limits(); + dump_nhm_cst_cfg(); } @@ -1985,6 +2068,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); @@ -2026,6 +2110,7 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x2D: @@ -2366,6 +2451,17 @@ int is_slm(unsigned int family, unsigned int model) return 0; } +int is_knl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + switch (model) { + case 0x57: /* KNL */ + return 1; + } + return 0; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2576,6 +2672,7 @@ void process_cpuid() do_c8_c9_c10 = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); + do_knl_cstates = is_knl(family, model); bclk = discover_bclk(family, model); rapl_probe(family, model); From e9be7dd62899194ebdd90d417fc6c07d5d157912 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 26 May 2015 12:19:37 -0400 Subject: [PATCH 341/481] tools/power turbostat: correctly decode of ENERGY_PERFORMANCE_BIAS When EPB is 0xF, turbosat was incorrectly describing it as "custom" instead of calling it "powersave": < cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x0000000f (custom) > cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x0000000f (powersave) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 256a5e1de381..f92211e9e70c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1919,7 +1919,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) return 0; - switch (msr & 0x7) { + switch (msr & 0xF) { case ENERGY_PERF_BIAS_PERFORMANCE: epb_string = "performance"; break; From 7ce7d5de6dd08ee2a713ef13f4499073b4a72b7f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 25 May 2015 08:34:28 -0400 Subject: [PATCH 342/481] tools/power turbostat: allow running without cpu0 Linux-3.7 added CONFIG_BOOTPARAM_HOTPLUG_CPU0, allowing systems to offline cpu0. But when cpu0 is offline, turbostat will not run: # turbostat ls turbostat: no /dev/cpu/0/msr This patch replaces the hard-coded use of cpu0 in turbostat with the current cpu, allowing it to run without a cpu0. Fewer cross-calls may also be needed due to use of current cpu, though this hard-coding was used only for the --debug preamble. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 46 ++++++++++++++++++--------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f92211e9e70c..68e77fdd4c04 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -92,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; +int base_cpu; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -1154,7 +1155,7 @@ dump_nhm_platform_info(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); @@ -1166,7 +1167,7 @@ dump_nhm_platform_info(void) fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); - get_msr(0, MSR_IA32_POWER_CTL, &msr); + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); @@ -1179,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); @@ -1201,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); @@ -1253,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); @@ -1309,7 +1310,7 @@ dump_knl_turbo_ratio_limits(void) int delta_ratio; int i; - get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); @@ -1365,7 +1366,7 @@ dump_nhm_cst_cfg(void) { unsigned long long msr; - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) @@ -1694,8 +1695,10 @@ restart: void check_dev_msr() { struct stat sb; + char pathname[32]; - if (stat("/dev/cpu/0/msr", &sb)) + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) if (system("/sbin/modprobe msr > /dev/null 2>&1")) err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } @@ -1708,6 +1711,7 @@ void check_permissions() cap_user_data_t cap_data = &cap_data_data; extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); int do_exit = 0; + char pathname[32]; /* check for CAP_SYS_RAWIO */ cap_header->pid = getpid(); @@ -1722,7 +1726,8 @@ void check_permissions() } /* test file permissions */ - if (euidaccess("/dev/cpu/0/msr", R_OK)) { + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (euidaccess(pathname, R_OK)) { do_exit++; warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); } @@ -1804,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) default: return 0; } - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; @@ -2043,7 +2048,7 @@ double get_tdp(model) unsigned long long msr; if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; switch (model) { @@ -2126,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model) } /* units on package 0, verify later other packages match */ - if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); @@ -2471,7 +2476,7 @@ double slm_bclk(void) unsigned int i; double freq; - if (get_msr(0, MSR_FSB_FREQ, &msr)) + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) fprintf(stderr, "SLM BCLK: unknown\n"); i = msr & 0xf; @@ -2539,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!do_nhm_platform_info) goto guess; - if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; target_c_local = (msr >> 16) & 0xFF; @@ -2913,13 +2918,24 @@ void setup_all_buffers(void) for_all_proc_cpus(initialize_counters); } +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); + + if (debug > 1) + fprintf(stderr, "base_cpu = %d\n", base_cpu); +} + void turbostat_init() { + setup_all_buffers(); + set_base_cpu(); check_dev_msr(); check_permissions(); process_cpuid(); - setup_all_buffers(); if (debug) for_all_cpus(print_epb, ODD_COUNTERS); From a68c7c3ff0469d79993ee85e8e0a3a9a568ce350 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2015 18:00:39 -0400 Subject: [PATCH 343/481] tools/power turbostat: update version number to 4.7 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 68e77fdd4c04..323b65edfc97 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3014,7 +3014,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" + fprintf(stderr, "turbostat version 4.7 27-May, 2015" " - Len Brown \n"); } From dc4fdaf0e4839109169d8261814813816951c75f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 May 2015 01:39:53 +0200 Subject: [PATCH 344/481] PCI / ACPI: Do not set ACPI companions for host bridges with parents Commit 97badf873ab6 (device property: Make it possible to use secondary firmware nodes) uncovered a bug in the x86 (and ia64) PCI host bridge initialization code that assumes bridge->bus->sysdata to always point to a struct pci_sysdata object which need not be the case (in particular, the Xen PCI frontend driver sets it to point to a different data type). If it is not the case, an incorrect pointer (or a piece of data that is not a pointer at all) will be passed to ACPI_COMPANION_SET() and that may cause interesting breakage to happen going forward. To work around this problem use the observation that the ACPI host bridge initialization always passes NULL as parent to pci_create_root_bus(), so if pcibios_root_bridge_prepare() sees a non-NULL parent of the bridge, it should not attempt to set an ACPI companion for it, because that means that pci_create_root_bus() has been called by someone else. Fixes: 97badf873ab6 (device property: Make it possible to use secondary firmware nodes) Reported-and-tested-by: Sander Eikelenboom Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- arch/ia64/pci/pci.c | 13 ++++++++++--- arch/x86/pci/acpi.c | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index d4e162d35b34..7cc3be9fa7c6 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_controller *controller = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, controller->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_controller *controller = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, controller->companion); + } return 0; } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d93963340c3c..14a63ed6fe09 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_sysdata *sd = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, sd->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_sysdata *sd = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, sd->companion); + } return 0; } From 2b6b24574256c05be145936f1493aec74c6904e5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 15:10:01 +1000 Subject: [PATCH 345/481] md/raid5: ensure whole batch is delayed for all required bitmap updates. When we add a stripe to a batch, we need to be sure that head stripe will wait for the bitmap update required for the new stripe. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b9f2b9cc6060..c55a68f37c72 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -837,6 +837,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh < IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); + if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { + int seq = sh->bm_seq; + if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && + sh->batch_head->bm_seq > seq) + seq = sh->batch_head->bm_seq; + set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); + sh->batch_head->bm_seq = seq; + } + atomic_inc(&sh->count); unlock_out: unlock_two_stripes(head, sh); From d0852df543e5aa7db34c1ad26d053782bcbf48f1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 27 May 2015 08:43:45 +1000 Subject: [PATCH 346/481] md/raid5: close race between STRIPE_BIT_DELAY and batching. When we add a write to a stripe we need to make sure the bitmap bit is set. While doing that the stripe is not locked so it could be added to a batch after which further changes to STRIPE_BIT_DELAY and ->bm_seq are ineffective. So we need to hold off adding to a stripe until bitmap_startwrite has completed at least once, and we need to avoid further changes to STRIPE_BIT_DELAY once the stripe has been added to a batch. If a bitmap_startwrite() completes after the stripe was added to a batch, it will not have set the bit, only incremented a counter, so no extra delay of the stripe is needed. Reported-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/raid5.c | 25 ++++++++++++++++++++++--- drivers/md/raid5.h | 3 +++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c55a68f37c72..42d0ea6c8597 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static bool stripe_can_batch(struct stripe_head *sh) { return test_bit(STRIPE_BATCH_READY, &sh->state) && + !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && is_full_stripe_write(sh); } @@ -2996,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)(*bip)->bi_iter.bi_sector, (unsigned long long)sh->sector, dd_idx); - spin_unlock_irq(&sh->stripe_lock); if (conf->mddev->bitmap && firstwrite) { + /* Cannot hold spinlock over bitmap_startwrite, + * but must ensure this isn't added to a batch until + * we have added to the bitmap and set bm_seq. + * So set STRIPE_BITMAP_PENDING to prevent + * batching. + * If multiple add_stripe_bio() calls race here they + * much all set STRIPE_BITMAP_PENDING. So only the first one + * to complete "bitmap_startwrite" gets to set + * STRIPE_BIT_DELAY. This is important as once a stripe + * is added to a batch, STRIPE_BIT_DELAY cannot be changed + * any more. + */ + set_bit(STRIPE_BITMAP_PENDING, &sh->state); + spin_unlock_irq(&sh->stripe_lock); bitmap_startwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0); - sh->bm_seq = conf->seq_flush+1; - set_bit(STRIPE_BIT_DELAY, &sh->state); + spin_lock_irq(&sh->stripe_lock); + clear_bit(STRIPE_BITMAP_PENDING, &sh->state); + if (!sh->batch_head) { + sh->bm_seq = conf->seq_flush+1; + set_bit(STRIPE_BIT_DELAY, &sh->state); + } } + spin_unlock_irq(&sh->stripe_lock); if (stripe_can_batch(sh)) stripe_add_to_batch_list(conf, sh); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 7dc0dd86074b..01cdb9f3a0c4 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -337,6 +337,9 @@ enum { STRIPE_ON_RELEASE_LIST, STRIPE_BATCH_READY, STRIPE_BATCH_ERR, + STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add + * to batch yet. + */ }; #define STRIPE_EXPAND_SYNC_FLAG \ From f36963c9d3f6f415732710da3acdd8608a9fa0e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 9 May 2015 03:14:13 +0930 Subject: [PATCH 347/481] cpumask_set_cpu_local_first => cpumask_local_spread, lament da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell (then rebased) Tested-by: Amir Vadai Acked-by: Amir Vadai Acked-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 10 +-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +- include/linux/cpumask.h | 6 +- lib/cpumask.c | 70 +++++++------------ 5 files changed, 35 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..6f9ffb9026cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + int numa_node = dev_to_node(&adapter->pdev->dev); if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) return -ENOMEM; - cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), - eqo->affinity_mask); - + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..cf467a9f6cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) { struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; int numa_node = priv->mdev->dev->numa_node; - int ret = 0; if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) return -ENOMEM; - ret = cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask); - if (ret) - free_cpumask_var(ring->affinity_mask); - - return ret; + cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node), + ring->affinity_mask); + return 0; } static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f7bf312fb443..7bed3a88579f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->queue_index = queue_index; if (queue_index < priv->num_tx_rings_p_up) - cpumask_set_cpu_local_first(queue_index, - priv->mdev->dev->numa_node, - &ring->affinity_mask); + cpumask_set_cpu(cpumask_local_spread(queue_index, + priv->mdev->dev->numa_node), + &ring->affinity_mask); *pring = ring; return 0; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5f627084f2e9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); From b15a9dbdbfe72848b7ed4cd3f97fe80daaf99c89 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 22 May 2015 15:20:04 +1000 Subject: [PATCH 348/481] md/raid5: Ensure a batch member is not handled prematurely. If a stripe is a member of a batch, but not the head, it must not be handled separately from the rest of the batch. 'clear_batch_ready()' handles this requirement to some extent but not completely. If a member is passed to handle_stripe() a second time it returns '0' indicating the stripe can be handled, which is wrong. So add an extra test. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 42d0ea6c8597..e58736740bac 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4200,9 +4200,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) static int clear_batch_ready(struct stripe_head *sh) { + /* Return '1' if this is a member of batch, or + * '0' if it is a lone stripe or a head which can now be + * handled. + */ struct stripe_head *tmp; if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) - return 0; + return (sh->batch_head && sh->batch_head != sh); spin_lock(&sh->stripe_lock); if (!sh->batch_head) { spin_unlock(&sh->stripe_lock); From 4e3d62ff4976f26d22b8b91572a49136bb3a23f1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 11:50:16 +1000 Subject: [PATCH 349/481] md/raid5: remove condition test from check_break_stripe_batch_list. handle_stripe_clean_event() contains a chunk of code very similar to check_break_stripe_batch_list(). If we make the latter more like the former, we can end up with just one copy of this code. This first step removed the condition (and the 'check_') part of the name. This has the added advantage of making it clear what check is being performed at the point where the function is called. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e58736740bac..fc5c4039c394 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4234,16 +4234,11 @@ static int clear_batch_ready(struct stripe_head *sh) return 0; } -static void check_break_stripe_batch_list(struct stripe_head *sh) +static void break_stripe_batch_list(struct stripe_head *head_sh) { - struct stripe_head *head_sh, *next; + struct stripe_head *sh, *next; int i; - if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) - return; - - head_sh = sh; - list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { list_del_init(&sh->batch_list); @@ -4290,7 +4285,8 @@ static void handle_stripe(struct stripe_head *sh) return; } - check_break_stripe_batch_list(sh); + if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) + break_stripe_batch_list(sh); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { spin_lock(&sh->stripe_lock); From fb642b92c267beeefd352af9bc461eac93a7552c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 12:00:47 +1000 Subject: [PATCH 350/481] md/raid5: duplicate some more handle_stripe_clean_event code in break_stripe_batch_list break_stripe_batch list didn't clear head_sh->batch_head. This was probably a bug. Also clear all R5_Overlap flags and if any were cleared, wake up 'wait_for_overlap'. This isn't always necessary but the worst effect is a little extra checking for code that is waiting on wait_for_overlap. Also, don't use wake_up_nr() because that does the wrong thing if 'nr' is zero, and it number of flags cleared doesn't strongly correlate with the number of threads to wake. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fc5c4039c394..6de2e1edd492 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3557,7 +3557,8 @@ unhash: spin_lock_irq(&head_sh->stripe_lock); head_sh->batch_head = NULL; spin_unlock_irq(&head_sh->stripe_lock); - wake_up_nr(&conf->wait_for_overlap, wakeup_nr); + if (wakeup_nr) + wake_up(&conf->wait_for_overlap); if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG) set_bit(STRIPE_HANDLE, &head_sh->state); } @@ -4238,6 +4239,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh) { struct stripe_head *sh, *next; int i; + int do_wakeup = 0; list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { @@ -4250,10 +4252,12 @@ static void break_stripe_batch_list(struct stripe_head *head_sh) STRIPE_EXPAND_SYNC_FLAG)); sh->check_state = head_sh->check_state; sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) + for (i = 0; i < sh->disks; i++) { + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + do_wakeup = 1; sh->dev[i].flags = head_sh->dev[i].flags & (~((1 << R5_WriteError) | (1 << R5_Overlap))); - + } spin_lock_irq(&sh->stripe_lock); sh->batch_head = NULL; spin_unlock_irq(&sh->stripe_lock); @@ -4261,6 +4265,15 @@ static void break_stripe_batch_list(struct stripe_head *head_sh) set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); } + spin_lock_irq(&head_sh->stripe_lock); + head_sh->batch_head = NULL; + spin_unlock_irq(&head_sh->stripe_lock); + for (i = 0; i < head_sh->disks; i++) + if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) + do_wakeup = 1; + + if (do_wakeup) + wake_up(&head_sh->raid_conf->wait_for_overlap); } static void handle_stripe(struct stripe_head *sh) From 3960ce796198254b7a1b420dc9a26d80928523bd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 12:20:36 +1000 Subject: [PATCH 351/481] md/raid5: add handle_flags arg to break_stripe_batch_list. When we break a stripe_batch_list we sometimes want to set STRIPE_HANDLE on the individual stripes, and sometimes not. So pass a 'handle_flags' arg. If it is zero, always set STRIPE_HANDLE (on non-head stripes). If not zero, only set it if any of the given flags are present. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6de2e1edd492..0b65eb51e562 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4235,7 +4235,8 @@ static int clear_batch_ready(struct stripe_head *sh) return 0; } -static void break_stripe_batch_list(struct stripe_head *head_sh) +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags) { struct stripe_head *sh, *next; int i; @@ -4261,8 +4262,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh) spin_lock_irq(&sh->stripe_lock); sh->batch_head = NULL; spin_unlock_irq(&sh->stripe_lock); - - set_bit(STRIPE_HANDLE, &sh->state); + if (handle_flags == 0 || + sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); } spin_lock_irq(&head_sh->stripe_lock); @@ -4271,6 +4273,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh) for (i = 0; i < head_sh->disks; i++) if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) do_wakeup = 1; + if (head_sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &head_sh->state); if (do_wakeup) wake_up(&head_sh->raid_conf->wait_for_overlap); @@ -4299,7 +4303,7 @@ static void handle_stripe(struct stripe_head *sh) } if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) - break_stripe_batch_list(sh); + break_stripe_batch_list(sh, 0); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { spin_lock(&sh->stripe_lock); From 1b956f7a8f9aa63ea9644ab8c3374cf381993363 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 12:40:26 +1000 Subject: [PATCH 352/481] md/raid5: be more selective about distributing flags across batch. When a batch of stripes is broken up, we keep some of the flags that were per-stripe, and copy other flags from the head to all others. This only happens while a stripe is being handled, so many of the flags are irrelevant. The "SYNC_FLAGS" (which I've renamed to make it clear there are several) and STRIPE_DEGRADED are set per-stripe and so need to be preserved. STRIPE_INSYNC is the only flag that is set on the head that needs to be propagated to all others. For safety, add a WARN_ON if others are set, except: STRIPE_HANDLE - this is safe and per-stripe and we are going to set in several cases anyway STRIPE_INSYNC STRIPE_IO_STARTED - this is just a hint and doesn't hurt. STRIPE_ON_PLUG_LIST STRIPE_ON_RELEASE_LIST - It is a point pointless for a batched stripe to be on one of these lists, but it can happen as can be safely ignored. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 55 ++++++++++++++++++++++++++++++++++++---------- drivers/md/raid5.h | 2 +- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0b65eb51e562..1141b7f62e6e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3534,10 +3534,27 @@ unhash: struct stripe_head, batch_list); list_del_init(&sh->batch_list); - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - STRIPE_EXPAND_SYNC_FLAG)); + WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | + (1 << STRIPE_SYNCING) | + (1 << STRIPE_REPLACED) | + (1 << STRIPE_PREREAD_ACTIVE) | + (1 << STRIPE_DELAYED) | + (1 << STRIPE_BIT_DELAY) | + (1 << STRIPE_FULL_WRITE) | + (1 << STRIPE_BIOFILL_RUN) | + (1 << STRIPE_COMPUTE_RUN) | + (1 << STRIPE_OPS_REQ_PENDING) | + (1 << STRIPE_DISCARD) | + (1 << STRIPE_BATCH_READY) | + (1 << STRIPE_BATCH_ERR) | + (1 << STRIPE_BITMAP_PENDING))); + WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | + (1 << STRIPE_REPLACED))); + + set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_DEGRADED)), + head_sh->state & (1 << STRIPE_INSYNC)); + sh->check_state = head_sh->check_state; sh->reconstruct_state = head_sh->reconstruct_state; for (i = 0; i < sh->disks; i++) { @@ -3549,7 +3566,7 @@ unhash: spin_lock_irq(&sh->stripe_lock); sh->batch_head = NULL; spin_unlock_irq(&sh->stripe_lock); - if (sh->state & STRIPE_EXPAND_SYNC_FLAG) + if (sh->state & STRIPE_EXPAND_SYNC_FLAGS) set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); } @@ -3559,7 +3576,7 @@ unhash: spin_unlock_irq(&head_sh->stripe_lock); if (wakeup_nr) wake_up(&conf->wait_for_overlap); - if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG) + if (head_sh->state & STRIPE_EXPAND_SYNC_FLAGS) set_bit(STRIPE_HANDLE, &head_sh->state); } @@ -4246,11 +4263,27 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, list_del_init(&sh->batch_list); - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED) | - STRIPE_EXPAND_SYNC_FLAG)); + WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | + (1 << STRIPE_SYNCING) | + (1 << STRIPE_REPLACED) | + (1 << STRIPE_PREREAD_ACTIVE) | + (1 << STRIPE_DELAYED) | + (1 << STRIPE_BIT_DELAY) | + (1 << STRIPE_FULL_WRITE) | + (1 << STRIPE_BIOFILL_RUN) | + (1 << STRIPE_COMPUTE_RUN) | + (1 << STRIPE_OPS_REQ_PENDING) | + (1 << STRIPE_DISCARD) | + (1 << STRIPE_BATCH_READY) | + (1 << STRIPE_BATCH_ERR) | + (1 << STRIPE_BITMAP_PENDING))); + WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | + (1 << STRIPE_REPLACED))); + + set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_DEGRADED)), + head_sh->state & (1 << STRIPE_INSYNC)); + sh->check_state = head_sh->check_state; sh->reconstruct_state = head_sh->reconstruct_state; for (i = 0; i < sh->disks; i++) { diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 01cdb9f3a0c4..896d603ad0da 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -342,7 +342,7 @@ enum { */ }; -#define STRIPE_EXPAND_SYNC_FLAG \ +#define STRIPE_EXPAND_SYNC_FLAGS \ ((1 << STRIPE_EXPAND_SOURCE) |\ (1 << STRIPE_EXPAND_READY) |\ (1 << STRIPE_EXPANDING) |\ From 787b76fa37159050f6d26aebfa6210009baed93b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 May 2015 12:56:41 +1000 Subject: [PATCH 353/481] md/raid5: call break_stripe_batch_list from handle_stripe_clean_event Now that the code in break_stripe_batch_list() is nearly identical to the end of handle_stripe_clean_event, replace the later with a function call. The only remaining difference of any interest is the masking that is applieds to dev[i].flags copied from head_sh. R5_WriteError certainly isn't wanted as it is set per-stripe, not per-patch. R5_Overlap isn't wanted as it is explicitly handled. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 61 +++------------------------------------------- 1 file changed, 4 insertions(+), 57 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1141b7f62e6e..3254504b1080 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3420,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags); /* handle_stripe_clean_event * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but @@ -3433,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf, int discard_pending = 0; struct stripe_head *head_sh = sh; bool do_endio = false; - int wakeup_nr = 0; for (i = disks; i--; ) if (sh->dev[i].written) { @@ -3522,62 +3523,8 @@ unhash: if (atomic_dec_and_test(&conf->pending_full_writes)) md_wakeup_thread(conf->mddev->thread); - if (!head_sh->batch_head || !do_endio) - return; - for (i = 0; i < head_sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) - wakeup_nr++; - } - while (!list_empty(&head_sh->batch_list)) { - int i; - sh = list_first_entry(&head_sh->batch_list, - struct stripe_head, batch_list); - list_del_init(&sh->batch_list); - - WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | - (1 << STRIPE_SYNCING) | - (1 << STRIPE_REPLACED) | - (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DELAYED) | - (1 << STRIPE_BIT_DELAY) | - (1 << STRIPE_FULL_WRITE) | - (1 << STRIPE_BIOFILL_RUN) | - (1 << STRIPE_COMPUTE_RUN) | - (1 << STRIPE_OPS_REQ_PENDING) | - (1 << STRIPE_DISCARD) | - (1 << STRIPE_BATCH_READY) | - (1 << STRIPE_BATCH_ERR) | - (1 << STRIPE_BITMAP_PENDING))); - WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | - (1 << STRIPE_REPLACED))); - - set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | - (1 << STRIPE_DEGRADED)), - head_sh->state & (1 << STRIPE_INSYNC)); - - sh->check_state = head_sh->check_state; - sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wakeup_nr++; - sh->dev[i].flags = head_sh->dev[i].flags; - } - - spin_lock_irq(&sh->stripe_lock); - sh->batch_head = NULL; - spin_unlock_irq(&sh->stripe_lock); - if (sh->state & STRIPE_EXPAND_SYNC_FLAGS) - set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); - } - - spin_lock_irq(&head_sh->stripe_lock); - head_sh->batch_head = NULL; - spin_unlock_irq(&head_sh->stripe_lock); - if (wakeup_nr) - wake_up(&conf->wait_for_overlap); - if (head_sh->state & STRIPE_EXPAND_SYNC_FLAGS) - set_bit(STRIPE_HANDLE, &head_sh->state); + if (head_sh->batch_head && do_endio) + break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); } static void handle_stripe_dirtying(struct r5conf *conf, From 626f2092c85ac847bb80b3257eb6a565dec32278 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 22 May 2015 14:03:10 +1000 Subject: [PATCH 354/481] md/raid5: break stripe-batches when the array has failed. Once the array has too much failure, we need to break stripe-batches up so they can all be dealt with. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3254504b1080..553d54b87052 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4337,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh) if (s.failed > conf->max_degraded) { sh->check_state = 0; sh->reconstruct_state = 0; + break_stripe_batch_list(sh, 0); if (s.to_read+s.to_write+s.written) handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); if (s.syncing + s.replacing) From cd5279c194f89c9b97c294af4aaf4ea8c5e3c704 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 27 May 2015 07:16:43 -0700 Subject: [PATCH 355/481] ip_vti/ip6_vti: Do not touch skb->mark on xmit Instead of modifying skb->mark we can simply modify the flowi_mark that is generated as a result of the xfrm_decode_session. By doing this we don't need to actually touch the skb->mark and it can be preserved as it passes out through the tunnel. Signed-off-by: Alexander Duyck Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 5 +++-- net/ipv6/ip6_vti.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9f7269f3c54a..4c318e1c13c8 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -216,8 +216,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(tunnel->parms.o_key); - switch (skb->protocol) { case htons(ETH_P_IP): xfrm_decode_session(skb, &fl, AF_INET); @@ -233,6 +231,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); + return vti_xmit(skb, dev, &fl); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed9d681207fa..104de4da3ff3 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -495,7 +495,6 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) int ret; memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(t->parms.o_key); switch (skb->protocol) { case htons(ETH_P_IPV6): @@ -516,6 +515,9 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(t->parms.o_key); + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; From 049f8e2e28d9c3dac0744cc2f19d3157c7fb5646 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 27 May 2015 07:16:49 -0700 Subject: [PATCH 356/481] xfrm: Override skb->mark with tunnel->parm.i_key in xfrm_input This change makes it so that if a tunnel is defined we just use the mark from the tunnel instead of the mark from the skb header. By doing this we can avoid the need to set skb->mark inside of the tunnel receive functions. Signed-off-by: Alexander Duyck Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 526c4feb3b50..b58286ecd156 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include static struct kmem_cache *secpath_cachep __read_mostly; @@ -186,6 +188,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_state *x = NULL; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; + u32 mark = skb->mark; unsigned int family; int decaps = 0; int async = 0; @@ -203,6 +206,18 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; + /* if tunnel is present override skb->mark value with tunnel i_key */ + if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) { + switch (family) { + case AF_INET: + mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); + break; + case AF_INET6: + mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); + break; + } + } + /* Allocate new secpath or COW existing one. */ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { struct sec_path *sp; @@ -229,7 +244,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); From d55c670cbc54b2270a465cdc382ce71adae45785 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 27 May 2015 07:16:54 -0700 Subject: [PATCH 357/481] ip_vti/ip6_vti: Preserve skb->mark after rcv_cb call The vti6_rcv_cb and vti_rcv_cb calls were leaving the skb->mark modified after completing the function. This resulted in the original skb->mark value being lost. Since we only need skb->mark to be set for xfrm_policy_check we can pull the assignment into the rcv_cb calls and then just restore the original mark after xfrm_policy_check has been completed. Signed-off-by: Alexander Duyck Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 9 +++++++-- net/ipv6/ip6_vti.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4c318e1c13c8..0c152087ca15 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, goto drop; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - skb->mark = be32_to_cpu(tunnel->parms.i_key); return xfrm_input(skb, nexthdr, spi, encap_type); } @@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; + u32 orig_mark = skb->mark; + int ret; if (!tunnel) return 1; @@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(tunnel->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 104de4da3ff3..ff3bd863fa03 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -322,7 +322,6 @@ static int vti6_rcv(struct sk_buff *skb) } XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); @@ -342,6 +341,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + u32 orig_mark = skb->mark; + int ret; if (!t) return 1; @@ -358,7 +359,11 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(t->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); From 56ccc1125bc141cf63927eda7febff4216dea2d3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 May 2015 17:53:29 +1000 Subject: [PATCH 358/481] md: fix race when unfreezing sync_action A recent change removed the need for locking around writing to "sync_action" (and various other places), but introduced a subtle race. When e.g. setting 'reshape' on a 'frozen' array, the 'frozen' flag is cleared before 'reshape' is set, so the md thread can get in and start trying recovery - which isn't wanted. So instead of clearing MD_RECOVERY_FROZEN for any command except 'frozen', only clear it when each specific command is parsed. This allows the handling of 'reshape' to clear the bit while a lock is held. Also remove some places where we set MD_RECOVERY_NEEDED, as it is always set on non-error exit of the function. Signed-off-by: NeilBrown Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") --- drivers/md/md.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d4f31e195e26..8f10f4ea70ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4211,12 +4211,12 @@ action_store(struct mddev *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); @@ -4229,16 +4229,17 @@ action_store(struct mddev *mddev, const char *page, size_t len) test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return -EBUSY; else if (cmd_match(page, "resync")) - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else if (cmd_match(page, "recover")) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } else if (cmd_match(page, "reshape")) { int err; if (mddev->pers->start_reshape == NULL) return -EINVAL; err = mddev_lock(mddev); if (!err) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); mddev_unlock(mddev); } @@ -4250,6 +4251,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); else if (!cmd_match(page, "repair")) return -EINVAL; + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); } From b40eda6408e94ee286cb5720cd3f409f70e01778 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Thu, 28 May 2015 09:40:23 +0200 Subject: [PATCH 359/481] ALSA: hda - Disable Headphone Mic boost for ALC662 When headphone mic boost is above zero, some 10 - 20 second delay might occur before the headphone mic is operational. Therefore disable the headphone mic boost control (recording gain is sufficient even without it). (Note: this patch is not about the headset mic, it's about the less common mic-in only mode.) BugLink: https://bugs.launchpad.net/bugs/1454235 Suggested-by: Kailang Yang Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e3bf72bb278c..ea3af38eee58 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4228,6 +4228,11 @@ static void alc_fixup_headset_mode_alc662(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; spec->gen.hp_mic = 1; /* Mic-in is same pin as headphone */ + + /* Disable boost for mic-in permanently. (This code is only called + from quirks that guarantee that the headphone is at NID 0x1b.) */ + snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000); + snd_hda_override_wcaps(codec, 0x1b, get_wcaps(codec, 0x1b) & ~AC_WCAP_IN_AMP); } else alc_fixup_headset_mode(codec, fix, action); } From 1e6277de3a23373b89e0affc3d179f2173b857a4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 28 May 2015 09:29:27 +0100 Subject: [PATCH 360/481] x86/mm: Mark arch_ioremap_p{m,u}d_supported() __init ... as their only caller is. Signed-off-by: Jan Beulich Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5566EE07020000780007E683@mail.emea.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 82d63ed70045..b0da3588b452 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -365,7 +365,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int arch_ioremap_pud_supported(void) +int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 return cpu_has_gbpages; @@ -374,7 +374,7 @@ int arch_ioremap_pud_supported(void) #endif } -int arch_ioremap_pmd_supported(void) +int __init arch_ioremap_pmd_supported(void) { return cpu_has_pse; } From fc8a350d0b8df744fd6d3c55907b3886979d2638 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 13 May 2015 14:34:07 +0300 Subject: [PATCH 361/481] iwlwifi: pcie: fix tracking of cmd_in_flight The cmd_in_flight tracking was introduced to workaround faulty power management hardware, by having the driver keep the NIC awake as long as there are commands in flight. However, some of the code handling this workaround was unconditionally executed, which resulted with an inconsistent state where the driver assumed that the NIC was awake although it wasn't. Fix this by renaming 'cmd_in_flight' to 'cmd_hold_nic_awake' and handling the NIC requested awake state only for hardwares for which the workaround is needed. Signed-off-by: Ilan Peer Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/internal.h | 6 ++--- drivers/net/wireless/iwlwifi/pcie/trans.c | 4 ++-- drivers/net/wireless/iwlwifi/pcie/tx.c | 23 ++++++++------------ 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h index 01996c9d98a7..376b84e54ad7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/iwlwifi/pcie/internal.h @@ -1,7 +1,7 @@ /****************************************************************************** * - * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -320,7 +320,7 @@ struct iwl_trans_pcie { /*protect hw register */ spinlock_t reg_lock; - bool cmd_in_flight; + bool cmd_hold_nic_awake; bool ref_cmd_in_flight; /* protect ref counter */ diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index d6f6515fe663..dc179094e6a0 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1372,7 +1372,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent, spin_lock_irqsave(&trans_pcie->reg_lock, *flags); - if (trans_pcie->cmd_in_flight) + if (trans_pcie->cmd_hold_nic_awake) goto out; /* this bit wakes up the NIC */ @@ -1438,7 +1438,7 @@ static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans, */ __acquire(&trans_pcie->reg_lock); - if (trans_pcie->cmd_in_flight) + if (trans_pcie->cmd_hold_nic_awake) goto out; __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 06952aadfd7b..5ef8044c2ea3 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1039,18 +1039,14 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, iwl_trans_pcie_ref(trans); } - if (trans_pcie->cmd_in_flight) - return 0; - - trans_pcie->cmd_in_flight = true; - /* * wake up the NIC to make sure that the firmware will see the host * command - we will let the NIC sleep once all the host commands * returned. This needs to be done only on NICs that have * apmg_wake_up_wa set. */ - if (trans->cfg->base_params->apmg_wake_up_wa) { + if (trans->cfg->base_params->apmg_wake_up_wa && + !trans_pcie->cmd_hold_nic_awake) { __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) @@ -1064,10 +1060,10 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, if (ret < 0) { __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); - trans_pcie->cmd_in_flight = false; IWL_ERR(trans, "Failed to wake NIC for hcmd\n"); return -EIO; } + trans_pcie->cmd_hold_nic_awake = true; } return 0; @@ -1085,15 +1081,14 @@ static int iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans) iwl_trans_pcie_unref(trans); } - if (WARN_ON(!trans_pcie->cmd_in_flight)) - return 0; + if (trans->cfg->base_params->apmg_wake_up_wa) { + if (WARN_ON(!trans_pcie->cmd_hold_nic_awake)) + return 0; - trans_pcie->cmd_in_flight = false; - - if (trans->cfg->base_params->apmg_wake_up_wa) + trans_pcie->cmd_hold_nic_awake = false; __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, - CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); - + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + } return 0; } From f115fdfd61bd627e99d636bb61a3d3ff93397048 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 19 May 2015 14:20:25 +0300 Subject: [PATCH 362/481] iwlwifi: nvm: fix otp parsing in 8000 hw family The radio cfg DWORD was taken from the wrong place in the 8000 HW family, after a line in the code was wrongly changed by mistake. This broke several 8260 devices. Fixes: 5dd9c68a854a ("iwlwifi: drop support for early versions of 8000") Signed-off-by: Liad Kaufman Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 75e96db6626b..8e604a3931ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -471,7 +471,7 @@ static int iwl_get_radio_cfg(const struct iwl_cfg *cfg, const __le16 *nvm_sw, if (cfg->device_family != IWL_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + RADIO_CFG); - return le32_to_cpup((__le32 *)(nvm_sw + RADIO_CFG_FAMILY_8000)); + return le32_to_cpup((__le32 *)(phy_sku + RADIO_CFG_FAMILY_8000)); } From 3a7af58faa7829faa26026c245d2a8a44e9605c5 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 13 Apr 2015 18:27:35 +0200 Subject: [PATCH 363/481] mac80211: Fix mac80211.h docbook comments A couple of enums in mac80211.h became structures recently, but the comments didn't follow suit, leading to errors like: Error(.//include/net/mac80211.h:367): Cannot parse enum! Documentation/DocBook/Makefile:93: recipe for target 'Documentation/DocBook/80211.xml' failed make[1]: *** [Documentation/DocBook/80211.xml] Error 1 Makefile:1361: recipe for target 'mandocs' failed make: *** [mandocs] Error 2 Fix the comments comments accordingly. Added a couple of other small comment fixes while I was there to silence other recently-added docbook warnings. Reported-by: Jim Davis Signed-off-by: Jonathan Corbet Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..fc57f6b82fc5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; From 7d072b404c5d8f1e0b62b6bc488dfeaa61bd2d8d Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Tue, 26 May 2015 13:19:46 +0200 Subject: [PATCH 364/481] brcmfmac: avoid null pointer access when brcmf_msgbuf_get_pktid() fails The function brcmf_msgbuf_get_pktid() may return a NULL pointer so the callers should check the return pointer before accessing it to avoid the crash below (see [1]): brcmfmac: brcmf_msgbuf_get_pktid: Invalid packet id 273 (not in use) BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 IP: [] skb_pull+0x5/0x50 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: pci_stub vboxpci(O) vboxnetflt(O) vboxnetadp(O) vboxdrv(O) snd_hda_codec_hdmi bnep mousedev hid_generic ushwmon msr ext4 crc16 mbcache jbd2 sd_mod uas usb_storage ahci libahci libata scsi_mod xhci_pci xhci_hcd usbcore usb_common CPU: 0 PID: 1661 Comm: irq/61-brcmf_pc Tainted: G O 4.0.1-MacbookPro-ARCH #1 Hardware name: Apple Inc. MacBookPro12,1/Mac-E43C1C25D4880AD6, BIOS MBP121.88Z.0167.B02.1503241251 03/24/2015 task: ffff880264203cc0 ti: ffff88025ffe4000 task.ti: ffff88025ffe4000 RIP: 0010:[] [] skb_pull+0x5/0x50 RSP: 0018:ffff88025ffe7d40 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff88008a33c000 RCX: 0000000000000044 RDX: 0000000000000000 RSI: 000000000000004a RDI: 0000000000000000 RBP: ffff88025ffe7da8 R08: 0000000000000096 R09: 000000000000004a R10: 0000000000000000 R11: 000000000000048e R12: ffff88025ff14f00 R13: 0000000000000000 R14: ffff880263b48200 R15: ffff88008a33c000 FS: 0000000000000000(0000) GS:ffff88026ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000080 CR3: 000000000180b000 CR4: 00000000003407f0 Stack: ffffffffa06aed74 ffff88025ffe7dc8 ffff880263b48270 ffff880263b48278 05ea88020000004a 0002ffff81014635 000000001720b2f6 ffff88026ec116c0 ffff880263b48200 0000000000010000 ffff880263b4ae00 ffff880264203cc0 Call Trace: [] ? brcmf_msgbuf_process_rx+0x404/0x480 [brcmfmac] [] ? irq_finalize_oneshot.part.30+0xf0/0xf0 [] brcmf_proto_msgbuf_rx_trigger+0x35/0xf0 [brcmfmac] [] brcmf_pcie_isr_thread_v2+0x8a/0x130 [brcmfmac] [] irq_thread_fn+0x20/0x50 [] irq_thread+0x13f/0x170 [] ? wake_threads_waitq+0x30/0x30 [] ? irq_thread_dtor+0xb0/0xb0 [] kthread+0xd8/0xf0 [] ? kthread_create_on_node+0x1c0/0x1c0 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x1c0/0x1c0 Code: 01 83 e2 f7 88 50 01 48 83 c4 08 5b 5d f3 c3 0f 1f 80 00 00 00 00 83 e2 f7 88 50 01 c3 66 0f 1f 84 00 00 00 00 00 0f 1f RIP [] skb_pull+0x5/0x50 RSP CR2: 0000000000000080 ---[ end trace b074c0f90e7c997d ]--- [1] http://mid.gmane.org/20150430193259.GA5630@googlemail.com Cc: # v3.18, v3.19, v4.0, v4.1 Reported-by: Michael Hornung Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c index 4ec9811f49c8..65efb1468988 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c @@ -511,11 +511,9 @@ static int brcmf_msgbuf_query_dcmd(struct brcmf_pub *drvr, int ifidx, msgbuf->rx_pktids, msgbuf->ioctl_resp_pktid); if (msgbuf->ioctl_resp_ret_len != 0) { - if (!skb) { - brcmf_err("Invalid packet id idx recv'd %d\n", - msgbuf->ioctl_resp_pktid); + if (!skb) return -EBADF; - } + memcpy(buf, skb->data, (len < msgbuf->ioctl_resp_ret_len) ? len : msgbuf->ioctl_resp_ret_len); } @@ -874,10 +872,8 @@ brcmf_msgbuf_process_txstatus(struct brcmf_msgbuf *msgbuf, void *buf) flowid -= BRCMF_NROF_H2D_COMMON_MSGRINGS; skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev, msgbuf->tx_pktids, idx); - if (!skb) { - brcmf_err("Invalid packet id idx recv'd %d\n", idx); + if (!skb) return; - } set_bit(flowid, msgbuf->txstatus_done_map); commonring = msgbuf->flowrings[flowid]; @@ -1156,6 +1152,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf) skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev, msgbuf->rx_pktids, idx); + if (!skb) + return; if (data_offset) skb_pull(skb, data_offset); From 7c0411d2fabc2e2702c9871ffb603e251158b317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 28 May 2015 15:51:59 +0200 Subject: [PATCH 365/481] drm/radeon: partially revert "fix VM_CONTEXT*_PAGE_TABLE_END_ADDR handling" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have that bug for years and some users report side effects when fixing it on older hardware. So revert it for VM_CONTEXT0_PAGE_TABLE_END_ADDR, but keep it for VM 1-15. Signed-off-by: Christian König CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 2 +- drivers/gpu/drm/radeon/evergreen.c | 2 +- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/rv770.c | 2 +- drivers/gpu/drm/radeon/si.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a0c35bbc8546..ba50f3c1c2e0 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5822,7 +5822,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(4)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 05e6d6ef5963..f848acfd3fc8 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2485,7 +2485,7 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index aba2f428c0a8..64d3a771920d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1282,7 +1282,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(6)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 25b4ac967742..8f6d862a1882 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1112,7 +1112,7 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index c54d6313a46d..01ee96acb398 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -921,7 +921,7 @@ static int rv770_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5326f753e107..4c679b802bc8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4303,7 +4303,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(4)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); From 74f9ce1cf2830b94e189f4e99678dbf19aa3bc90 Mon Sep 17 00:00:00 2001 From: George Wang Date: Fri, 29 May 2015 07:39:34 +1000 Subject: [PATCH 366/481] xfs: use percpu_counter_read_positive for mp->m_icount Function percpu_counter_read just return the current counter, which can be negative. This will cause the checking of "allocated inode counts <= m_maxicount" false positive. Use percpu_counter_read_positive can solve this problem, and be consistent with the purpose to introduce percpu mechanism to xfs. Signed-off-by: George Wang Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_ialloc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 07349a183a11..1c9e75521250 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc( */ newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && - percpu_counter_read(&args.mp->m_icount) + newlen > + percpu_counter_read_positive(&args.mp->m_icount) + newlen > args.mp->m_maxicount) return -ENOSPC; args.minlen = args.maxlen = args.mp->m_ialloc_blks; @@ -1339,10 +1339,13 @@ xfs_dialloc( * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. + * + * Read rough value of mp->m_icount by percpu_counter_read_positive, + * which will sacrifice the preciseness but improve the performance. */ if (mp->m_maxicount && - percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > - mp->m_maxicount) { + percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos + > mp->m_maxicount) { noroom = 1; okalloc = 0; } From 80188b0d77d7426b494af739ac129e0e684acb84 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:39:34 +1000 Subject: [PATCH 367/481] percpu_counter: batch size aware __percpu_counter_compare() XFS uses non-stanard batch sizes for avoiding frequent global counter updates on it's allocated inode counters, as they increment or decrement in batches of 64 inodes. Hence the standard percpu counter batch of 32 means that the counter is effectively a global counter. Currently Xfs uses a batch size of 128 so that it doesn't take the global lock on every single modification. However, Xfs also needs to compare accurately against zero, which means we need to use percpu_counter_compare(), and that has a hard-coded batch size of 32, and hence will spuriously fail to detect when it is supposed to use precise comparisons and hence the accounting goes wrong. Add __percpu_counter_compare() to take a custom batch size so we can use it sanely in XFS and factor percpu_counter_compare() to use it. Signed-off-by: Dave Chinner Acked-by: Tejun Heo Signed-off-by: Dave Chinner --- include/linux/percpu_counter.h | 13 ++++++++++++- lib/percpu_counter.c | 6 +++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { From 8c1903d3081ab7c416f1bbc7905f37a265d5e2e9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:39:34 +1000 Subject: [PATCH 368/481] xfs: inode and free block counters need to use __percpu_counter_compare Because the counters use a custom batch size, the comparison functions need to be aware of that batch size otherwise the comparison does not work correctly. This leads to ASSERT failures on generic/027 like this: XFS: Assertion failed: 0, file: fs/xfs/xfs_mount.c, line: 1099 ------------[ cut here ]------------ .... Call Trace: [] xfs_mod_icount+0x99/0xc0 [] xfs_trans_unreserve_and_mod_sb+0x28b/0x5b0 [] xfs_log_commit_cil+0x321/0x580 [] xfs_trans_commit+0xb7/0x260 [] xfs_bmap_finish+0xcd/0x1b0 [] xfs_inactive_ifree+0x1e1/0x250 [] xfs_inactive+0x130/0x200 [] xfs_fs_evict_inode+0x91/0xf0 [] evict+0xb8/0x190 [] iput+0x18b/0x1f0 [] do_unlinkat+0x1f3/0x320 [] ? filp_close+0x5a/0x80 [] SyS_unlinkat+0x1b/0x40 [] system_call_fastpath+0x12/0x71 This is a regression introduced by commit 501ab32 ("xfs: use generic percpu counters for inode counter"). This patch fixes the same problem for both the inode counter and the free block counter in the superblocks. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2ce7ee3b4ec1..6f23fbdfb365 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp) return xfs_sync_sb(mp, true); } +/* + * Deltas for the inode count are +/-64, hence we use a large batch size + * of 128 so we don't need to take the counter lock on every update. + */ +#define XFS_ICOUNT_BATCH 128 int xfs_mod_icount( struct xfs_mount *mp, int64_t delta) { - /* deltas are +/-64, hence the large batch size of 128. */ - __percpu_counter_add(&mp->m_icount, delta, 128); - if (percpu_counter_compare(&mp->m_icount, 0) < 0) { + __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); + if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { ASSERT(0); percpu_counter_add(&mp->m_icount, -delta); return -EINVAL; @@ -1113,6 +1117,14 @@ xfs_mod_ifree( return 0; } +/* + * Deltas for the block count can vary from 1 to very large, but lock contention + * only occurs on frequent small block count updates such as in the delayed + * allocation path for buffered writes (page a time updates). Hence we set + * a large batch count (1024) to minimise global counter updates except when + * we get near to ENOSPC and we have to be very accurate with our updates. + */ +#define XFS_FDBLOCKS_BATCH 1024 int xfs_mod_fdblocks( struct xfs_mount *mp, @@ -1151,25 +1163,19 @@ xfs_mod_fdblocks( * Taking blocks away, need to be more accurate the closer we * are to zero. * - * batch size is set to a maximum of 1024 blocks - if we are - * allocating of freeing extents larger than this then we aren't - * going to be hammering the counter lock so a lock per update - * is not a problem. - * * If the counter has a value of less than 2 * max batch size, * then make everything serialise as we are real close to * ENOSPC. */ -#define __BATCH 1024 - if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) + if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH, + XFS_FDBLOCKS_BATCH) < 0) batch = 1; else - batch = __BATCH; -#undef __BATCH + batch = XFS_FDBLOCKS_BATCH; __percpu_counter_add(&mp->m_fdblocks, delta, batch); - if (percpu_counter_compare(&mp->m_fdblocks, - XFS_ALLOC_SET_ASIDE(mp)) >= 0) { + if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), + XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ return 0; } From 6dea405eee9e5e73eb54a1dbcc5b65047113cd92 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:40:06 +1000 Subject: [PATCH 369/481] xfs: extent size hints can round up extents past MAXEXTLEN This results in BMBT corruption, as seen by this test: # mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc .... # mount /dev/vdc /mnt/scratch # xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo which results in this failure on a debug kernel: XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211 .... Call Trace: [] xfs_bmbt_set_allf+0x8f/0x100 [] xfs_bmbt_set_all+0x1d/0x20 [] xfs_iext_insert+0x9e/0x120 [] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [] xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [] xfs_bmapi_write+0x72b/0xed0 [] ? kmem_cache_alloc+0x15c/0x170 [] xfs_alloc_file_space+0x160/0x400 [] ? down_write+0x29/0x60 [] xfs_file_fallocate+0x29b/0x310 [] ? __sb_start_write+0x58/0x120 [] ? do_vfs_ioctl+0x318/0x570 [] vfs_fallocate+0x140/0x260 [] SyS_fallocate+0x48/0x80 [] system_call_fastpath+0x12/0x17 The tracepoint that indicates the extent that triggered the assert failure is: xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1 Clearly indicating that the extent length is greater than MAXEXTLEN, which is 2097151. A prior trace point shows the allocation was an exact size match and that a length greater than MAXEXTLEN was asked for: xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152 ^^^^^^^ ^^^^^^^ We don't see this problem with extent size hints through the IO path because we can't do single IOs large enough to trigger MAXEXTLEN allocation. fallocate(), OTOH, is not limited in it's allocation sizes and so needs help here. The issue is that the extent size hint alignment is rounding up the extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking into account extent size hints when calculating the maximum extent length to allocate. xfs_bmapi_reserve_delalloc() is already doing this, but direct extent allocation is not. Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is wrong, and it works only because delayed allocation extents are not limited in size to MAXEXTLEN in the in-core extent tree. hence this calculation does not work for direct allocation, and the delalloc code needs fixing. This may, in fact be the underlying bug that occassionally causes transaction overruns in delayed allocation extent conversion, so now we know it's wrong we should fix it, too. Many thanks to Brian Foster for finding this problem during review of this patch. Hence the fix, after much code reading, is to allow xfs_bmap_extsize_align() to align partial extents when full alignment would extend the alignment past MAXEXTLEN. We can safely do this because all callers have higher layer allocation loops that already handle short allocations, and so will simply run another allocation to cover the remainder of the requested allocation range that we ignored during alignment. The advantage of this approach is that it also removes the need for callers to do anything other than limit their requests to MAXEXTLEN - they don't really need to be aware of extent size hints at all. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index aeffeaaac0ec..f1026e86dabc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align( align_alen += temp; align_off -= temp; } - /* - * Same adjustment for the end of the requested area. - */ - if ((temp = (align_alen % extsz))) { + + /* Same adjustment for the end of the requested area. */ + temp = (align_alen % extsz); + if (temp) align_alen += extsz - temp; - } + + /* + * For large extent hint sizes, the aligned extent might be larger than + * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls + * the length back under MAXEXTLEN. The outer allocation loops handle + * short allocation just fine, so it is safe to do this. We only want to + * do it when we are forced to, though, because it means more allocation + * operations are required. + */ + while (align_alen > MAXEXTLEN) + align_alen -= extsz; + ASSERT(align_alen <= MAXEXTLEN); + /* * If the previous block overlaps with this proposed allocation * then move the start forward without adjusting the length. @@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align( return -EINVAL; } else { ASSERT(orig_off >= align_off); - ASSERT(orig_end <= align_off + align_alen); + /* see MAXEXTLEN handling above */ + ASSERT(orig_end <= align_off + align_alen || + align_alen + extsz > MAXEXTLEN); } #ifdef DEBUG @@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc( /* Figure out the extent size, adjust alen */ extsz = xfs_get_extsz_hint(ip); if (extsz) { - /* - * Make sure we don't exceed a single extent length when we - * align the extent by reducing length we are going to - * allocate by the maximum amount extent size aligment may - * require. - */ - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); From 6dfe5a049f2d48582050339d2a6b6fda36dfd14c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:40:08 +1000 Subject: [PATCH 370/481] xfs: xfs_attr_inactive leaves inconsistent attr fork state behind xfs_attr_inactive() is supposed to clean up the attribute fork when the inode is being freed. While it removes attribute fork extents, it completely ignores attributes in local format, which means that there can still be active attributes on the inode after xfs_attr_inactive() has run. This leads to problems with concurrent inode writeback - the in-core inode attribute fork is removed without locking on the assumption that nothing will be attempting to access the attribute fork after a call to xfs_attr_inactive() because it isn't supposed to exist on disk any more. To fix this, make xfs_attr_inactive() completely remove all traces of the attribute fork from the inode, regardless of it's state. Further, also remove the in-core attribute fork structure safely so that there is nothing further that needs to be done by callers to clean up the attribute fork. This means we can remove the in-core and on-disk attribute forks atomically. Also, on error simply remove the in-memory attribute fork. There's nothing that can be done with it once we have failed to remove the on-disk attribute fork, so we may as well just blow it away here anyway. cc: # 3.12 to 4.0 Reported-by: Waiman Long Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_leaf.c | 8 ++-- fs/xfs/libxfs/xfs_attr_leaf.h | 2 +- fs/xfs/xfs_attr_inactive.c | 85 ++++++++++++++++++++--------------- fs/xfs/xfs_inode.c | 12 ++--- 4 files changed, 59 insertions(+), 48 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 04e79d57bca6..e9d401ce93bb 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) * After the last attribute is removed revert to original inode format, * making all literal area available to the data fork once more. */ -STATIC void -xfs_attr_fork_reset( +void +xfs_attr_fork_remove( struct xfs_inode *ip, struct xfs_trans *tp) { @@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) (mp->m_flags & XFS_MOUNT_ATTR2) && (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & XFS_DA_OP_ADDNAME)) { - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); } else { xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); @@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform( if (forkoff == -1) { ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 025c4b820c03..882c8d338891 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_list(struct xfs_attr_list_context *context); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); - +void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* * Internal routines when attribute fork size == XFS_LBSIZE(mp). diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index f9c1c64782d3..3fbf167cfb4c 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -380,23 +380,31 @@ xfs_attr3_root_inactive( return error; } +/* + * xfs_attr_inactive kills all traces of an attribute fork on an inode. It + * removes both the on-disk and in-memory inode fork. Note that this also has to + * handle the condition of inodes without attributes but with an attribute fork + * configured, so we can't use xfs_inode_hasattr() here. + * + * The in-memory attribute fork is removed even on error. + */ int -xfs_attr_inactive(xfs_inode_t *dp) +xfs_attr_inactive( + struct xfs_inode *dp) { - xfs_trans_t *trans; - xfs_mount_t *mp; - int error; + struct xfs_trans *trans; + struct xfs_mount *mp; + int cancel_flags = 0; + int lock_mode = XFS_ILOCK_SHARED; + int error = 0; mp = dp->i_mount; ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return 0; - } - xfs_iunlock(dp, XFS_ILOCK_SHARED); + xfs_ilock(dp, lock_mode); + if (!XFS_IFORK_Q(dp)) + goto out_destroy_fork; + xfs_iunlock(dp, lock_mode); /* * Start our first transaction of the day. @@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp) * the inode in every transaction to let it float upward through * the log. */ + lock_mode = 0; trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); - if (error) { - xfs_trans_cancel(trans, 0); - return error; - } - xfs_ilock(dp, XFS_ILOCK_EXCL); + if (error) + goto out_cancel; + + lock_mode = XFS_ILOCK_EXCL; + cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; + xfs_ilock(dp, lock_mode); + + if (!XFS_IFORK_Q(dp)) + goto out_cancel; /* * No need to make quota reservations here. We expect to release some @@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp) */ xfs_trans_ijoin(trans, dp, 0); - /* - * Decide on what work routines to call based on the inode size. - */ - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - error = 0; - goto out; - } - error = xfs_attr3_root_inactive(&trans, dp); - if (error) - goto out; + /* invalidate and truncate the attribute fork extents */ + if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + error = xfs_attr3_root_inactive(&trans, dp); + if (error) + goto out_cancel; - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); - if (error) - goto out; + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); + if (error) + goto out_cancel; + } + + /* Reset the attribute fork - this also destroys the in-core fork */ + xfs_attr_fork_remove(dp, trans); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - + xfs_iunlock(dp, lock_mode); return error; -out: - xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(dp, XFS_ILOCK_EXCL); +out_cancel: + xfs_trans_cancel(trans, cancel_flags); +out_destroy_fork: + /* kill the in-core attr fork before we drop the inode lock */ + if (dp->i_afp) + xfs_idestroy_fork(dp, XFS_ATTR_FORK); + if (lock_mode) + xfs_iunlock(dp, lock_mode); return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d6ebc85192b7..1117dd3ba123 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1946,21 +1946,17 @@ xfs_inactive( /* * If there are attributes associated with the file then blow them away * now. The code calls a routine that recursively deconstructs the - * attribute fork. We need to just commit the current transaction - * because we can't use it for xfs_attr_inactive(). + * attribute fork. If also blows away the in-core attribute fork. */ - if (ip->i_d.di_anextents > 0) { - ASSERT(ip->i_d.di_forkoff != 0); - + if (XFS_IFORK_Q(ip)) { error = xfs_attr_inactive(ip); if (error) return; } - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - + ASSERT(!ip->i_afp); ASSERT(ip->i_d.di_anextents == 0); + ASSERT(ip->i_d.di_forkoff == 0); /* * Free the inode. From cddc116228cb9d51d3224d23ba3e61fbbc3ec3d2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:40:32 +1000 Subject: [PATCH 371/481] xfs: xfs_iozero can return positive errno It was missed when we converted everything in XFs to use negative error numbers, so fix it now. Bug introduced in 3.17 by commit 2451337 ("xfs: global error sign conversion"), and should go back to stable kernels. Thanks to Brian Foster for noticing it. cc: # 3.17, 3.18, 3.19, 4.0 Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8121e75352ee..3b7591224f4a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -124,7 +124,7 @@ xfs_iozero( status = 0; } while (count); - return (-status); + return status; } int From 22419ac9fe5e79483596cebdbd1d1209c18bac1a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 29 May 2015 08:14:55 +1000 Subject: [PATCH 372/481] xfs: fix broken i_nlink accounting for whiteout tmpfile inode XFS uses the internal tmpfile() infrastructure for the whiteout inode used for RENAME_WHITEOUT operations. For tmpfile inodes, XFS allocates the inode, drops di_nlink, adds the inode to the agi unlinked list, calls d_tmpfile() which correspondingly drops i_nlink of the vfs inode, and then finishes the common inode setup (e.g., clear I_NEW and unlock). The d_tmpfile() call was originally made inxfs_create_tmpfile(), but was pulled up out of that function as part of the following commit to resolve a deadlock issue: 330033d6 xfs: fix tmpfile/selinux deadlock and initialize security As a result, callers of xfs_create_tmpfile() are responsible for either calling d_tmpfile() or fixing up i_nlink appropriately. The whiteout tmpfile allocation helper does neither. As a result, the vfs ->i_nlink becomes inconsistent with the on-disk ->di_nlink once xfs_rename() links it back into the source dentry and calls xfs_bumplink(). Update the assert in xfs_rename() to help detect this problem in the future and update xfs_rename_alloc_whiteout() to decrement the link count as part of the manual tmpfile inode setup. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1117dd3ba123..539a85fddbc2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2879,7 +2879,13 @@ xfs_rename_alloc_whiteout( if (error) return error; - /* Satisfy xfs_bumplink that this is a real tmpfile */ + /* + * Prepare the tmpfile inode as if it were created through the VFS. + * Otherwise, the link increment paths will complain about nlink 0->1. + * Drop the link count as done by d_tmpfile(), complete the inode setup + * and flag it as linkable. + */ + drop_nlink(VFS_I(tmpfile)); xfs_finish_inode_setup(tmpfile); VFS_I(tmpfile)->i_state |= I_LINKABLE; @@ -3147,7 +3153,7 @@ xfs_rename( * intermediate state on disk. */ if (wip) { - ASSERT(wip->i_d.di_nlink == 0); + ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); error = xfs_bumplink(tp, wip); if (error) goto out_trans_abort; From 9ee971a0b8efbae472882b1d3e8e736f4d6e3da9 Mon Sep 17 00:00:00 2001 From: Lars Seipel Date: Thu, 21 May 2015 03:04:10 +0200 Subject: [PATCH 373/481] drm/nouveau/gr/gf100-: fix wrong constant definition Commit 3740c82590d8 ("drm/nouveau/gr/gf100-: add symbolic names for classes") introduced a wrong macro definition causing acceleration setup to fail. Fix it. Signed-off-by: Lars Seipel Fixes: 3740c82590d8 ("drm/nouveau/gr/gf100-: add symbolic names for classes") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/class.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 0b5af0fe8659..64f8b2f687d2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -14,7 +14,7 @@ #define FERMI_TWOD_A 0x0000902d -#define FERMI_MEMORY_TO_MEMORY_FORMAT_A 0x0000903d +#define FERMI_MEMORY_TO_MEMORY_FORMAT_A 0x00009039 #define KEPLER_INLINE_TO_MEMORY_A 0x0000a040 #define KEPLER_INLINE_TO_MEMORY_B 0x0000a140 From c9ab50d21032ca8c5013f4d57e9aa866da78d7c7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 21 May 2015 15:44:15 +1000 Subject: [PATCH 374/481] drm/nouveau/devinit/gf100: make the force-post condition more obvious And also more generic, so it can be used on newer chipsets. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c index e8778c67578e..9f09037731fc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c @@ -95,7 +95,9 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { + struct nvkm_devinit_impl *impl = (void *)oclass; struct nv50_devinit_priv *priv; + u64 disable; int ret; ret = nvkm_devinit_create(parent, engine, oclass, &priv); @@ -103,7 +105,8 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; - if (nv_rd32(priv, 0x022500) & 0x00000001) + disable = impl->disable(&priv->base); + if (disable & (1ULL << NVDEV_ENGINE_DISP)) priv->base.post = true; return 0; From 4d4d6f7520b6fce065a8b8303ace9ec48b9e96e9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 21 May 2015 15:47:16 +1000 Subject: [PATCH 375/481] drm/nouveau/devinit/gm100-: force devinit table execution on boards without PDISP Should fix fdo#89558 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c index 9f09037731fc..c61102f70805 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c @@ -90,7 +90,7 @@ gf100_devinit_disable(struct nvkm_devinit *devinit) return disable; } -static int +int gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c index b345a53e881d..87ca0ece37b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c @@ -48,7 +48,7 @@ struct nvkm_oclass * gm107_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c index 535172c5f1ad..1076fcf0d716 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c @@ -161,7 +161,7 @@ struct nvkm_oclass * gm204_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h index b882b65ff3cd..9243521c80ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h @@ -15,6 +15,9 @@ int nv50_devinit_pll_set(struct nvkm_devinit *, u32, u32); int gt215_devinit_pll_set(struct nvkm_devinit *, u32, u32); +int gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *, + struct nvkm_oclass *, void *, u32, + struct nvkm_object **); int gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32); u64 gm107_devinit_disable(struct nvkm_devinit *); From aaea3938b5637ffb1d77e8b7707c207b24e02937 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 4 May 2015 12:26:00 +1000 Subject: [PATCH 376/481] drm/nouveau/gr/gm204: remove a stray printk Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c index 2f5eadd12a9b..fdb1dcf16a59 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c @@ -329,7 +329,6 @@ gm204_gr_init(struct nvkm_object *object) nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008); for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - printk(KERN_ERR "ppc %d %d\n", gpc, priv->ppc_nr[gpc]); for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++) nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000); nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000); From e5feb1ebaadfb1f5f70a3591e762d780232a4f5d Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:16 -0700 Subject: [PATCH 377/481] tracing/mm: don't trace kmem_cache_free on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_kmem_cache_free can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:148 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .kmem_cache_free+0x344/0x4b0 .__mmdrop+0x4c/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting kmem_cache_free trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reported-by: Aneesh Kumar K.V Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 81ea59812117..78efa0a197a9 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -140,11 +140,22 @@ DEFINE_EVENT(kmem_free, kfree, TP_ARGS(call_site, ptr) ); -DEFINE_EVENT(kmem_free, kmem_cache_free, +DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr) + TP_ARGS(call_site, ptr), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())) ); TRACE_EVENT(mm_page_free, From 1f0c27b50f4f2567e649f49d77daee2bbf3f40e5 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:19 -0700 Subject: [PATCH 378/481] tracing/mm: don't trace mm_page_free on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_mm_page_free can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:170 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .free_pages_prepare+0x494/0x680 .free_hot_cold_page+0x50/0x280 .destroy_context+0x90/0xd0 .__mmdrop+0x58/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting mm_page_free trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 78efa0a197a9..aa863549e77a 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -158,12 +158,24 @@ DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_CONDITION(cpu_online(raw_smp_processor_id())) ); -TRACE_EVENT(mm_page_free, +TRACE_EVENT_CONDITION(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) From 649b8de2f75145bf14cb1783688e16d51ac9b89a Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:22 -0700 Subject: [PATCH 379/481] tracing/mm: don't trace mm_page_pcpu_drain on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_mm_page_pcpu_drain can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:265 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 1 lock held by swapper/5/0: #0: (&(&zone->lock)->rlock){..-...}, at: [] .free_pcppages_bulk+0x70/0x920 stack backtrace: CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .free_pcppages_bulk+0x60c/0x920 .free_hot_cold_page+0x208/0x280 .destroy_context+0x90/0xd0 .__mmdrop+0x58/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting mm_page_pcpu_drain trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aa863549e77a..f7554fd7fc62 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -276,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_ARGS(page, order, migratetype) ); -DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, +TRACE_EVENT_CONDITION(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + + TP_STRUCT__entry( + __field( unsigned long, pfn ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->pfn = page ? page_to_pfn(page) : -1UL; + __entry->order = order; + __entry->migratetype = migratetype; + ), + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) From 2b1d3ae940acd11be44c6eced5873d47c2e00ffa Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 28 May 2015 15:44:24 -0700 Subject: [PATCH 380/481] fs/binfmt_elf.c:load_elf_binary(): return -EINVAL on zero-length mappings load_elf_binary() returns `retval', not `error'. Fixes: a87938b2e246b81b4fb ("fs/binfmt_elf.c: fix bug in loading of PIE binaries") Reported-by: James Hogan Cc: Michael Davidson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 241ef68d2893..cd46e4158830 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm) total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { - error = -EINVAL; + retval = -EINVAL; goto out_free_dentry; } } From cd4e6c91a114a23b0b8d61d460a2d1d14e3b45f4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 28 May 2015 15:44:27 -0700 Subject: [PATCH 381/481] MAINTAINERS: update CAPABILITIES pattern Commit 1ddd3b4e07a4 ("LSM: Remove unused capability.c") removed the file, remove the file pattern. Signed-off-by: Joe Perches Acked-by: Casey Schaufler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 474bcb6c0bac..af802b357b6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2427,7 +2427,6 @@ L: linux-security-module@vger.kernel.org S: Supported F: include/linux/capability.h F: include/uapi/linux/capability.h -F: security/capability.c F: security/commoncap.c F: kernel/capability.c From dcbff39da3d815f08750552fdd04f96b51751129 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 28 May 2015 15:44:29 -0700 Subject: [PATCH 382/481] fs, omfs: add NULL terminator in the end up the token list match_token() expects a NULL terminator at the end of the token list so that it would know where to stop. Not having one causes it to overrun to invalid memory. In practice, passing a mount option that omfs didn't recognize would sometimes panic the system. Signed-off-by: Sasha Levin Signed-off-by: Bob Copeland Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/omfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 138321b0c6c2..70d4191cf33d 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -359,7 +359,7 @@ nomem: } enum { - Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask + Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err }; static const match_table_t tokens = { @@ -368,6 +368,7 @@ static const match_table_t tokens = { {Opt_umask, "umask=%o"}, {Opt_dmask, "dmask=%o"}, {Opt_fmask, "fmask=%o"}, + {Opt_err, NULL}, }; static int parse_options(char *options, struct omfs_sb_info *sbi) From 3a281f946640542a7a7372f436e101ee1fbc4c97 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 28 May 2015 15:44:32 -0700 Subject: [PATCH 383/481] omfs: set error return when d_make_root() fails A static checker found the following issue in the error path for omfs_fill_super: fs/omfs/inode.c:552 omfs_fill_super() warn: missing error code here? 'd_make_root()' failed. 'ret' = '0' Fix by returning -ENOMEM in this case. Signed-off-by: Bob Copeland Reported-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/omfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 70d4191cf33d..6ce542c11f98 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -549,8 +549,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) } sb->s_root = d_make_root(root); - if (!sb->s_root) + if (!sb->s_root) { + ret = -ENOMEM; goto out_brelse_bh2; + } printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); ret = 0; From c0345ee57d461343586b5e1e2f9c3c3766d07fe6 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 28 May 2015 15:44:35 -0700 Subject: [PATCH 384/481] omfs: fix sign confusion for bitmap loop counter The count variable is used to iterate down to (below) zero from the size of the bitmap and handle the one-filling the remainder of the last partial bitmap block. The loop conditional expects count to be signed in order to detect when the final block is processed, after which count goes negative. Unfortunately, a recent change made this unsigned along with some other related fields. The result of is this is that during mount, omfs_get_imap will overrun the bitmap array and corrupt memory unless number of blocks happens to be a multiple of 8 * blocksize. Fix by changing count back to signed: it is guaranteed to fit in an s32 without overflow due to an enforced limit on the number of blocks in the filesystem. Signed-off-by: Bob Copeland Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/omfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 6ce542c11f98..3d935c81789a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = { */ static int omfs_get_imap(struct super_block *sb) { - unsigned int bitmap_size, count, array_size; + unsigned int bitmap_size, array_size; + int count; struct omfs_sb_info *sbi = OMFS_SB(sb); struct buffer_head *bh; unsigned long **ptr; From 5a6b2b36a8249ec9dfb2a714acadad86d9cc0aee Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 28 May 2015 15:44:37 -0700 Subject: [PATCH 385/481] omfs: fix potential integer overflow in allocator Both 'i' and 'bits_per_entry' are signed integers but the result is a u64 block number. Cast i to u64 to avoid truncation on 32-bit targets. Found by Coverity (CID 200679). Signed-off-by: Bob Copeland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/omfs/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index 082234581d05..83f4e76511c2 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb, goto out; found: - *return_block = i * bits_per_entry + bit; + *return_block = (u64) i * bits_per_entry + bit; *return_size = run; ret = set_run(sb, i, bits_per_entry, bit, run, 1); From ca3f172c197b5fa6b2693f5f93d4928a1420fb07 Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Thu, 28 May 2015 15:44:40 -0700 Subject: [PATCH 386/481] scripts/gdb: fix lx-lsmod refcnt Commit 2f35c41f58a9 ("module: Replace module_ref with atomic_t refcnt") changes the way refcnt is handled but did not update the gdb script to use the new variable. Since refcnt is not per-cpu anymore, we can directly read its value. Signed-off-by: Adrien Schildknecht Reviewed-by: Jan Kiszka Cc: Pantelis Koukousoulas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/modules.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scripts/gdb/linux/modules.py b/scripts/gdb/linux/modules.py index a1504c4f1900..25db8cff44a2 100644 --- a/scripts/gdb/linux/modules.py +++ b/scripts/gdb/linux/modules.py @@ -73,18 +73,11 @@ class LxLsmod(gdb.Command): " " if utils.get_long_type().sizeof == 8 else "")) for module in module_list(): - ref = 0 - module_refptr = module['refptr'] - for cpu in cpus.cpu_list("cpu_possible_mask"): - refptr = cpus.per_cpu(module_refptr, cpu) - ref += refptr['incs'] - ref -= refptr['decs'] - gdb.write("{address} {name:<19} {size:>8} {ref}".format( address=str(module['module_core']).split()[0], name=module['name'].string(), size=str(module['core_size']), - ref=str(ref))) + ref=str(module['refcnt']['counter']))) source_list = module['source_list'] t = self._module_use_type.get_type().pointer() From 210347e1846d391aea863fb69165a2d70581f838 Mon Sep 17 00:00:00 2001 From: Roger Luethi Date: Tue, 26 May 2015 19:12:54 +0200 Subject: [PATCH 387/481] via-rhine: Resigning as maintainer I don't have enough time to look after via-rhine anymore. Signed-off-by: Roger Luethi Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 474bcb6c0bac..1bbacdaf5cf3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10588,8 +10588,7 @@ F: drivers/virtio/virtio_input.c F: include/uapi/linux/virtio_input.h VIA RHINE NETWORK DRIVER -M: Roger Luethi -S: Maintained +S: Orphan F: drivers/net/ethernet/via/via-rhine.c VIA SD/MMC CARD CONTROLLER DRIVER From 2159184ea01e4ae7d15f2017e296d4bc82d5aeb0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 May 2015 23:09:19 -0400 Subject: [PATCH 388/481] d_walk() might skip too much when we find that a child has died while we'd been trying to ascend, we should go into the first live sibling itself, rather than its sibling. Off-by-one in question had been introduced in "deal with deadlock in d_walk()" and the fix needs to be backported to all branches this one has been backported to. Cc: stable@vger.kernel.org # 3.2 and later Signed-off-by: Al Viro --- fs/dcache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 656ce522a218..37b5afdaf698 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1239,13 +1239,13 @@ ascend: /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_child.next; - while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + /* go into the first sibling still alive */ + do { + next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); - next = next->next; - } + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } From b47eee2e0a7de623c24dbcaf303c3bf2b5155455 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 May 2015 09:43:29 +0200 Subject: [PATCH 389/481] ALSA: hda - Fix lost sound due to stream_pm ops cleanup The commit [49fb18972581: ALSA: hda - Set stream_pm ops automatically by generic parser] resulted in regressions on some Realtek and VIA codecs because these drivers set patch_ops after calling the generic parser, thus stream_pm got cleared to NULL again. I haven't noticed since I tested with IDT codec. Restore (partial revert) the stream_pm ops for them to fix the regression. Fixes: 49fb18972581 ('ALSA: hda - Set stream_pm ops automatically by generic parser') Reported-by: Jeremiah Mahler Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + sound/pci/hda/patch_via.c | 1 + 2 files changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ea3af38eee58..464168426465 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5729,6 +5729,7 @@ static int patch_alc269(struct hda_codec *codec) set_beep_amp(spec, spec->gen.mixer_nid, 0x04, HDA_INPUT); codec->patch_ops = alc_patch_ops; + codec->patch_ops.stream_pm = snd_hda_gen_stream_pm; #ifdef CONFIG_PM codec->patch_ops.suspend = alc269_suspend; codec->patch_ops.resume = alc269_resume; diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 742087ef378f..31a95cca015d 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -472,6 +472,7 @@ static const struct hda_codec_ops via_patch_ops = { .init = via_init, .free = via_free, .unsol_event = snd_hda_jack_unsol_event, + .stream_pm = snd_hda_gen_stream_pm, #ifdef CONFIG_PM .suspend = via_suspend, .check_power_status = via_check_power_status, From 5530c84f62b49a2e9f8c11a079d00f85ea20bb09 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 May 2015 12:58:37 +0200 Subject: [PATCH 390/481] ARM: multi_v7_defconfig: Replace CONFIG_USB_ISP1760_HCD by CONFIG_USB_ISP1760 Since commit 100832abf065bc18 ("usb: isp1760: Make HCD support optional"), CONFIG_USB_ISP1760_HCD is automatically selected when needed. Enabling that option in the defconfig is now a no-op, and no longer enables ISP1760 HCD support. Re-enable the ISP1760 driver in the defconfig by enabling USB_ISP1760_HOST_ROLE instead. Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- arch/arm/configs/multi_v7_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 0ca4a3eaf65d..fbbb1915c6a9 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -429,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_STI=y CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1760=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_STI=y CONFIG_USB_OHCI_HCD_PLATFORM=y From e5d8de32cc02a259e1a237ab57cba00f2930fa6a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 28 May 2015 15:12:52 -0400 Subject: [PATCH 391/481] dm: fix false warning in free_rq_clone() for unmapped requests When stacking request-based dm device on non blk-mq device and device-mapper target could not map the request (error target is used, multipath target with all paths down, etc), the WARN_ON_ONCE() in free_rq_clone() will trigger when it shouldn't. The warning was added by commit aa6df8d ("dm: fix free_rq_clone() NULL pointer when requeueing unmapped request"). But free_rq_clone() with clone->q == NULL is valid usage for the case where dm_kill_unmapped_request() initiates request cleanup. Fix this false warning by just removing the WARN_ON -- it only generated false positives and was never useful in catching the intended case (completing clone request not being mapped e.g. clone->q being NULL). Fixes: aa6df8d ("dm: fix free_rq_clone() NULL pointer when requeueing unmapped request") Reported-by: Bart Van Assche Reported-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1badfb250a18..e24069aaeb18 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1082,13 +1082,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone, bool must_be_mapped) +static void free_rq_clone(struct request *clone) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; - WARN_ON_ONCE(must_be_mapped && !clone->q); - blk_rq_unprep_clone(clone); if (md->type == DM_TYPE_MQ_REQUEST_BASED) @@ -1132,7 +1130,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone, true); + free_rq_clone(clone); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1151,7 +1149,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone, false); + free_rq_clone(clone); } /* From 15b94a690470038aa08247eedbebbe7e2218d5ee Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Fri, 29 May 2015 08:51:03 +0000 Subject: [PATCH 392/481] dm: fix reload failure of 0 path multipath mapping on blk-mq devices dm-multipath accepts 0 path mapping. # echo '0 2097152 multipath 0 0 0 0' | dmsetup create newdev Such a mapping can be used to release underlying devices while still holding requests in its queue until working paths come back. However, once the multipath device is created over blk-mq devices, it rejects reloading of 0 path mapping: # echo '0 2097152 multipath 0 0 1 1 queue-length 0 1 1 /dev/sda 1' \ | dmsetup create mpath1 # echo '0 2097152 multipath 0 0 0 0' | dmsetup load mpath1 device-mapper: reload ioctl on mpath1 failed: Invalid argument Command failed With following kernel message: device-mapper: ioctl: can't change device type after initial table load. DM tries to inherit the current table type using dm_table_set_type() but it doesn't work as expected because of unnecessary check about whether the target type is hybrid or not. Hybrid type is for targets that work as either request-based or bio-based and not required for blk-mq or non blk-mq checking. Fixes: 65803c205983 ("dm table: train hybrid target type detection to select blk-mq if appropriate") Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d9b00b8565c6..16ba55ad7089 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args) } EXPORT_SYMBOL(dm_consume_args); +static bool __table_type_request_based(unsigned table_type) +{ + return (table_type == DM_TYPE_REQUEST_BASED || + table_type == DM_TYPE_MQ_REQUEST_BASED); +} + static int dm_table_set_type(struct dm_table *t) { unsigned i; @@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t) * Determine the type from the live device. * Default to bio-based if device is new. */ - if (live_md_type == DM_TYPE_REQUEST_BASED || - live_md_type == DM_TYPE_MQ_REQUEST_BASED) + if (__table_type_request_based(live_md_type)) request_based = 1; else bio_based = 1; @@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t) } t->type = DM_TYPE_MQ_REQUEST_BASED; - } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) { + } else if (list_empty(devices) && __table_type_request_based(live_md_type)) { /* inherit live MD type */ t->type = live_md_type; @@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) bool dm_table_request_based(struct dm_table *t) { - unsigned table_type = dm_table_get_type(t); - - return (table_type == DM_TYPE_REQUEST_BASED || - table_type == DM_TYPE_MQ_REQUEST_BASED); + return __table_type_request_based(dm_table_get_type(t)); } bool dm_table_mq_request_based(struct dm_table *t) From 1c220c69ce0dcc0f234a9f263ad9c0864f971852 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 29 May 2015 14:52:51 +0100 Subject: [PATCH 393/481] dm: fix casting bug in dm_merge_bvec() dm_merge_bvec() was originally added in f6fccb ("dm: introduce merge_bvec_fn"). In that commit a value in sectors is converted to bytes using << 9, and then assigned to an int. This code made assumptions about the value of BIO_MAX_SECTORS. A later commit 148e51 ("dm: improve documentation and code clarity in dm_merge_bvec") was meant to have no functional change but it removed the use of BIO_MAX_SECTORS in favor of using queue_max_sectors(). At this point the cast from sector_t to int resulted in a zero value. The fallout being dm_merge_bvec() would only allow a single page to be added to a bio. This interim fix is minimal for the benefit of stable@ because the more comprehensive cleanup of passing a sector_t to all DM targets' merge function will impact quite a few DM targets. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.19+ --- drivers/md/dm.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e24069aaeb18..2caf492890d6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1723,8 +1723,7 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors; - int max_size = 0; + sector_t max_sectors, max_size = 0; if (unlikely(!map)) goto out; @@ -1739,8 +1738,16 @@ static int dm_merge_bvec(struct request_queue *q, max_sectors = min(max_io_len(bvm->bi_sector, ti), (sector_t) queue_max_sectors(q)); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ - max_size = 0; + + /* + * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t + * to the targets' merge function since it holds sectors not bytes). + * Just doing this as an interim fix for stable@ because the more + * comprehensive cleanup of switching to sector_t will impact every + * DM target that implements a ->merge hook. + */ + if (max_size > INT_MAX) + max_size = INT_MAX; /* * merge_bvec_fn() returns number of bytes @@ -1748,7 +1755,7 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, max_size); + max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); /* * If the target doesn't support merge method and some of the devices * provided their merge_bvec method (we know this by looking for the From 556b6629c10c65cee42fa893681ffdd653d097c4 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 27 May 2015 19:50:00 +0200 Subject: [PATCH 394/481] MIPS: ath79: fix build problem if CONFIG_BLK_DEV_INITRD is not set initrd_start is defined in init/do_mounts_initrd.c, which is only included in kernel if CONFIG_BLK_DEV_INITRD=y. Signed-off-by: Laurent Fasnacht Cc: linux-mips@linux-mips.org Cc: trivial@kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10198/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/prom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c index e1fe63051136..597899ad5438 100644 --- a/arch/mips/ath79/prom.c +++ b/arch/mips/ath79/prom.c @@ -1,6 +1,7 @@ /* * Atheros AR71XX/AR724X/AR913X specific prom routines * + * Copyright (C) 2015 Laurent Fasnacht * Copyright (C) 2008-2010 Gabor Juhos * Copyright (C) 2008 Imre Kaloz * @@ -25,12 +26,14 @@ void __init prom_init(void) { fw_init_cmdline(); +#ifdef CONFIG_BLK_DEV_INITRD /* Read the initrd address from the firmware environment */ initrd_start = fw_getenvl("initrd_start"); if (initrd_start) { initrd_start = KSEG0ADDR(initrd_start); initrd_end = initrd_start + fw_getenvl("initrd_size"); } +#endif } void __init prom_free_prom_memory(void) From 57b41758230b567218cb5bc3da9068aabc496fc9 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Tue, 26 May 2015 23:25:08 -0700 Subject: [PATCH 395/481] MIPS: BMIPS: Fix bmips_wr_vec() bmips_wr_vec() copies exception vector code from start to dst. The call to dma_cache_wback() needs to flush (end-start) bytes, starting at dst, from write-back cache to memory. Signed-off-by: Petri Gynther Acked-by: Florian Fainelli Reviewed-by: Kevin Cernekee Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10193/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-bmips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index fd528d7ea278..336708ae5c5b 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = { static void bmips_wr_vec(unsigned long dst, char *start, char *end) { memcpy((void *)dst, start, end - start); - dma_cache_wback((unsigned long)start, end - start); + dma_cache_wback(dst, end - start); local_flush_icache_range(dst, dst + (end - start)); instruction_hazard(); } From c4fca4fdea940bda2cff6b844cda6af8ef1c79cc Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Thu, 28 May 2015 17:46:49 +0100 Subject: [PATCH 396/481] MIPS: strnlen_user.S: Fix a CPU_DADDI_WORKAROUNDS regression Correct a regression introduced with 8453eebd [MIPS: Fix strnlen_user() return value in case of overlong strings.] causing assembler warnings and broken code generated in __strnlen_kernel_nocheck_asm: arch/mips/lib/strnlen_user.S: Assembler messages: arch/mips/lib/strnlen_user.S:64: Warning: Macro instruction expanded into multiple instructions in a branch delay slot with the CPU_DADDI_WORKAROUNDS option set, resulting in the function looping indefinitely upon mounting NFS root. Use conditional assembly to avoid a microMIPS code size regression. Using $at unconditionally would cause such a regression as there are no 16-bit instruction encodings available for ALU operations using this register. Using $v1 unconditionally would produce short microMIPS encodings, but would prevent this register from being used across calls to this function. The extra LI operation introduced is free, replacing a NOP originally scheduled into the delay slot of the branch that follows. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10205/ Signed-off-by: Ralf Baechle --- arch/mips/lib/strnlen_user.S | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 7d12c0dded3d..77e64942f004 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm) FEXPORT(__strnlen_\func\()_nocheck_asm) move v0, a0 PTR_ADDU a1, a0 # stop pointer -1: beq v0, a1, 1f # limit reached? +1: +#ifdef CONFIG_CPU_DADDI_WORKAROUNDS + .set noat + li AT, 1 +#endif + beq v0, a1, 1f # limit reached? .ifeqs "\func", "kernel" EX(lb, t0, (v0), .Lfault\@) .else @@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm) .endif .set noreorder bnez t0, 1b -1: PTR_ADDIU v0, 1 +1: +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS + PTR_ADDIU v0, 1 +#else + PTR_ADDU v0, AT + .set at +#endif .set reorder PTR_SUBU v0, a0 jr ra From 9aecac04d8d89e730ae362a748113b19c06a9d39 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 27 May 2015 16:11:48 -0700 Subject: [PATCH 397/481] hwmon: (tmp401) Do not auto-detect chip on I2C address 0x37 I2C address 0x37 may be used by EEPROMs, which can result in false positives. Do not attempt to detect a chip at this address. Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Guenter Roeck --- Documentation/hwmon/tmp401 | 2 +- drivers/hwmon/tmp401.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401 index 8eb88e974055..711f75e189eb 100644 --- a/Documentation/hwmon/tmp401 +++ b/Documentation/hwmon/tmp401 @@ -20,7 +20,7 @@ Supported chips: Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html * Texas Instruments TMP435 Prefix: 'tmp435' - Addresses scanned: I2C 0x37, 0x48 - 0x4f + Addresses scanned: I2C 0x48 - 0x4f Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html Authors: diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c index 99664ebc738d..ccf4cffe0ee1 100644 --- a/drivers/hwmon/tmp401.c +++ b/drivers/hwmon/tmp401.c @@ -44,7 +44,7 @@ #include /* Addresses to scan */ -static const unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4c, 0x4d, +static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 }; From 1b63bf617206ff35b93c57c67bbe067ac735a85a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 28 May 2015 09:08:09 -0700 Subject: [PATCH 398/481] hwmon: (nct6775) Add missing sysfs attribute initialization The following error message is seen when loading the nct6775 driver with DEBUG_LOCK_ALLOC enabled. BUG: key ffff88040b2f0030 not in .data! ------------[ cut here ]------------ WARNING: CPU: 0 PID: 186 at kernel/locking/lockdep.c:2988 lockdep_init_map+0x469/0x630() DEBUG_LOCKS_WARN_ON(1) Caused by a missing call to sysfs_attr_init() when initializing sysfs attributes. Reported-by: Alexey Orishko Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v3.12+ Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 4fcb48103299..bd1c99deac71 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -995,6 +995,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg, (*t)->dev_attr.attr.name, tg->base + i); if ((*t)->s2) { a2 = &su->u.a2; + sysfs_attr_init(&a2->dev_attr.attr); a2->dev_attr.attr.name = su->name; a2->nr = (*t)->u.s.nr + i; a2->index = (*t)->u.s.index; @@ -1005,6 +1006,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg, *attrs = &a2->dev_attr.attr; } else { a = &su->u.a1; + sysfs_attr_init(&a->dev_attr.attr); a->dev_attr.attr.name = su->name; a->index = (*t)->u.index + i; a->dev_attr.attr.mode = From c7bd6dc320b85445b6b36a0aff41f929210027c7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 28 May 2015 09:12:23 -0700 Subject: [PATCH 399/481] hwmon: (nct6683) Add missing sysfs attribute initialization The following error message is seen when loading the nct6683 driver with DEBUG_LOCK_ALLOC enabled. BUG: key ffff88040b2f0030 not in .data! ------------[ cut here ]------------ WARNING: CPU: 0 PID: 186 at kernel/locking/lockdep.c:2988 lockdep_init_map+0x469/0x630() DEBUG_LOCKS_WARN_ON(1) Caused by a missing call to sysfs_attr_init() when initializing sysfs attributes. Reported-by: Alexey Orishko Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6683.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/nct6683.c b/drivers/hwmon/nct6683.c index f3830db02d46..37f01702d081 100644 --- a/drivers/hwmon/nct6683.c +++ b/drivers/hwmon/nct6683.c @@ -439,6 +439,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg, (*t)->dev_attr.attr.name, tg->base + i); if ((*t)->s2) { a2 = &su->u.a2; + sysfs_attr_init(&a2->dev_attr.attr); a2->dev_attr.attr.name = su->name; a2->nr = (*t)->u.s.nr + i; a2->index = (*t)->u.s.index; @@ -449,6 +450,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg, *attrs = &a2->dev_attr.attr; } else { a = &su->u.a1; + sysfs_attr_init(&a->dev_attr.attr); a->dev_attr.attr.name = su->name; a->index = (*t)->u.index + i; a->dev_attr.attr.mode = From d588cf8f618d7b316743a0bc99fede20f7a01bb7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:52 +0200 Subject: [PATCH 400/481] target: Fix se_tpg_tfo->tf_subsys regression + remove tf_subsystem There is just one configfs subsystem in the target code, so we might as well add two helpers to reference / unreference it from the core code instead of passing pointers to it around. This fixes a regression introduced for v4.1-rc1 with commit 9ac8928e6, where configfs_depend_item() callers using se_tpg_tfo->tf_subsys would fail, because the assignment from the original target_core_subsystem[] is no longer happening at target_register_template() time. (Fix target_core_exit_configfs pointer dereference - Sagi) Signed-off-by: Christoph Hellwig Reported-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 6 ++--- drivers/target/target_core_configfs.c | 30 ++++++++++++------------- drivers/target/target_core_internal.h | 3 --- drivers/target/target_core_pr.c | 32 ++++++--------------------- drivers/target/target_core_xcopy.c | 15 +++++-------- drivers/vhost/scsi.c | 6 ++--- include/target/target_core_configfs.h | 2 -- include/target/target_core_fabric.h | 4 +++- 8 files changed, 34 insertions(+), 64 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 68c2002e78bf..5c9e680aa375 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1020,8 +1020,7 @@ static void tcm_qla2xxx_depend_tpg(struct work_struct *work) struct se_portal_group *se_tpg = &base_tpg->se_tpg; struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item)) { + if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { atomic_set(&base_tpg->lport_tpg_enabled, 1); qlt_enable_vha(base_vha); } @@ -1037,8 +1036,7 @@ static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { atomic_set(&base_tpg->lport_tpg_enabled, 0); - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } complete(&base_tpg->tpg_base_comp); } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index ddaf76a4ac2a..1580077971f8 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric( pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); - /* - * Setup tf_ops.tf_subsys pointer for usage with configfs_depend_item() - */ - tf->tf_ops.tf_subsys = tf->tf_subsys; tf->tf_fabric = &tf->tf_group.cg_item; pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric" " for %s\n", name); @@ -291,10 +287,17 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -struct configfs_subsystem *target_core_subsystem[] = { - &target_core_fabrics, - NULL, -}; +int target_depend_item(struct config_item *item) +{ + return configfs_depend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_depend_item); + +void target_undepend_item(struct config_item *item) +{ + return configfs_undepend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_undepend_item); /*############################################################################## // Start functions called by external Target Fabrics Modules @@ -467,7 +470,6 @@ int target_register_template(const struct target_core_fabric_ops *fo) * struct target_fabric_configfs->tf_cit_tmpl */ tf->tf_module = fo->module; - tf->tf_subsys = target_core_subsystem[0]; snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); tf->tf_ops = *fo; @@ -2870,7 +2872,7 @@ static int __init target_core_init_configfs(void) { struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL; struct config_group *lu_gp_cg = NULL; - struct configfs_subsystem *subsys; + struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; int ret; @@ -2878,7 +2880,6 @@ static int __init target_core_init_configfs(void) " Engine: %s on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine); - subsys = target_core_subsystem[0]; config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); @@ -3008,13 +3009,10 @@ out_global: static void __exit target_core_exit_configfs(void) { - struct configfs_subsystem *subsys; struct config_group *hba_cg, *alua_cg, *lu_gp_cg; struct config_item *item; int i; - subsys = target_core_subsystem[0]; - lu_gp_cg = &alua_lu_gps_group; for (i = 0; lu_gp_cg->default_groups[i]; i++) { item = &lu_gp_cg->default_groups[i]->cg_item; @@ -3045,8 +3043,8 @@ static void __exit target_core_exit_configfs(void) * We expect subsys->su_group.default_groups to be released * by configfs subsystem provider logic.. */ - configfs_unregister_subsystem(subsys); - kfree(subsys->su_group.default_groups); + configfs_unregister_subsystem(&target_core_fabrics); + kfree(target_core_fabrics.su_group.default_groups); core_alua_free_lu_gp(default_lu_gp); default_lu_gp = NULL; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 874a9bc988d8..68bd7f5d9f73 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -4,9 +4,6 @@ /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; -/* target_core_configfs.c */ -extern struct configfs_subsystem *target_core_subsystem[]; - /* target_core_device.c */ extern struct mutex g_device_mutex; extern struct list_head g_device_list; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index c1aa9655e96e..b7c81acf08d0 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1367,41 +1367,26 @@ void core_scsi3_free_all_registrations( static int core_scsi3_tpg_depend_item(struct se_portal_group *tpg) { - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); + return target_depend_item(&tpg->tpg_group.cg_item); } static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg) { - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); - + target_undepend_item(&tpg->tpg_group.cg_item); atomic_dec_mb(&tpg->tpg_pr_ref_count); } static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - if (nacl->dynamic_node_acl) return 0; - - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); + return target_depend_item(&nacl->acl_group.cg_item); } static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - - if (nacl->dynamic_node_acl) { - atomic_dec_mb(&nacl->acl_pr_ref_count); - return; - } - - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); - + if (!nacl->dynamic_node_acl) + target_undepend_item(&nacl->acl_group.cg_item); atomic_dec_mb(&nacl->acl_pr_ref_count); } @@ -1419,8 +1404,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); + return target_depend_item(&lun_acl->se_lun_group.cg_item); } static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) @@ -1438,9 +1422,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); - + target_undepend_item(&lun_acl->se_lun_group.cg_item); atomic_dec_mb(&se_deve->pr_ref_count); } diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index a600ff15dcfd..8fd680ac941b 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -58,7 +58,6 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op bool src) { struct se_device *se_dev; - struct configfs_subsystem *subsys = target_core_subsystem[0]; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; int rc; @@ -90,8 +89,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op " se_dev\n", xop->src_dev); } - rc = configfs_depend_item(subsys, - &se_dev->dev_group.cg_item); + rc = target_depend_item(&se_dev->dev_group.cg_item); if (rc != 0) { pr_err("configfs_depend_item attempt failed:" " %d for se_dev: %p\n", rc, se_dev); @@ -99,8 +97,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op return rc; } - pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" - " se_dev->se_dev_group: %p\n", subsys, se_dev, + pr_debug("Called configfs_depend_item for se_dev: %p" + " se_dev->se_dev_group: %p\n", se_dev, &se_dev->dev_group); mutex_unlock(&g_device_mutex); @@ -373,7 +371,6 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct configfs_subsystem *subsys = target_core_subsystem[0]; struct se_device *remote_dev; if (xop->op_origin == XCOL_SOURCE_RECV_OP) @@ -381,11 +378,11 @@ static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) else remote_dev = xop->src_dev; - pr_debug("Calling configfs_undepend_item for subsys: %p" + pr_debug("Calling configfs_undepend_item for" " remote_dev: %p remote_dev->dev_group: %p\n", - subsys, remote_dev, &remote_dev->dev_group.cg_item); + remote_dev, &remote_dev->dev_group.cg_item); - configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); + target_undepend_item(&remote_dev->dev_group.cg_item); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5e19bb53b3a9..ea32b386797f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1409,8 +1409,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, * dependency now. */ se_tpg = &tpg->se_tpg; - ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + ret = target_depend_item(&se_tpg->tpg_group.cg_item); if (ret) { pr_warn("configfs_depend_item() failed: %d\n", ret); kfree(vs_tpg); @@ -1513,8 +1512,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. */ se_tpg = &tpg->se_tpg; - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 25bb04c4209e..b99c01170392 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -40,8 +40,6 @@ struct target_fabric_configfs { struct config_item *tf_fabric; /* Passed from fabric modules */ struct config_item_type *tf_fabric_cit; - /* Pointer to target core subsystem */ - struct configfs_subsystem *tf_subsys; /* Pointer to fabric's struct module */ struct module *tf_module; struct target_core_fabric_ops tf_ops; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 17c7f5ac7ea0..0f4dc3768587 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,7 +4,6 @@ struct target_core_fabric_ops { struct module *module; const char *name; - struct configfs_subsystem *tf_subsys; char *(*get_fabric_name)(void); u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); @@ -109,6 +108,9 @@ struct target_core_fabric_ops { int target_register_template(const struct target_core_fabric_ops *fo); void target_unregister_template(const struct target_core_fabric_ops *fo); +int target_depend_item(struct config_item *item); +void target_undepend_item(struct config_item *item); + struct se_session *transport_init_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int); From 5a7125c64def3b21f8147eca8b54949a60963942 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 22 May 2015 14:07:44 -0700 Subject: [PATCH 401/481] target/pscsi: Don't leak scsi_host if hba is VIRTUAL_HOST See https://bugzilla.redhat.com/show_bug.cgi?id=1025672 We need to put() the reference to the scsi host that we got in pscsi_configure_device(). In VIRTUAL_HOST mode it is associated with the dev_virt, not the hba_virt. Signed-off-by: Andy Grover Cc: stable@vger.kernel.org # 2.6.38+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pscsi.c | 3 +++ drivers/target/target_core_pscsi.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f6c954c4635f..4073869d2090 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -521,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev) " pdv_host_id: %d\n", pdv->pdv_host_id); return -EINVAL; } + pdv->pdv_lld_host = sh; } } else { if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) { @@ -603,6 +604,8 @@ static void pscsi_free_device(struct se_device *dev) if ((phv->phv_mode == PHV_LLD_SCSI_HOST_NO) && (phv->phv_lld_host != NULL)) scsi_host_put(phv->phv_lld_host); + else if (pdv->pdv_lld_host) + scsi_host_put(pdv->pdv_lld_host); if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) scsi_device_put(sd); diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h index 1bd757dff8ee..820d3052b775 100644 --- a/drivers/target/target_core_pscsi.h +++ b/drivers/target/target_core_pscsi.h @@ -45,6 +45,7 @@ struct pscsi_dev_virt { int pdv_lun_id; struct block_device *pdv_bd; struct scsi_device *pdv_sd; + struct Scsi_Host *pdv_lld_host; } ____cacheline_aligned; typedef enum phv_modes { From cf87edc6022d1fe7efa6b8ce1a99f2ef6a1b27f9 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:38 -0700 Subject: [PATCH 402/481] target/user: Update example code for new ABI requirements We now require that the userspace handler set a bit if the command is not handled. Update calls to tcmu_hdr_get_op for v2. Signed-off-by: Andy Grover Reviewed-by: Ilias Tsitsimpis Signed-off-by: Nicholas Bellinger --- Documentation/target/tcmu-design.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index c80bf1ed8981..b495108c433c 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -324,7 +324,7 @@ int handle_device_events(int fd, void *map) /* Process events from cmd ring until we catch up with cmd_head */ while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) { - if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) { + if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) { uint8_t *cdb = (void *)mb + ent->req.cdb_off; bool success = true; @@ -339,8 +339,12 @@ int handle_device_events(int fd, void *map) ent->rsp.scsi_status = SCSI_CHECK_CONDITION; } } + else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) { + /* Tell the kernel we didn't handle unknown opcodes */ + ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP; + } else { - /* Do nothing for PAD entries */ + /* Do nothing for PAD entries except update cmd_tail */ } /* update cmd_tail */ From 9c1cd1b68cd15c81d12a0cf2402129475882b620 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:39 -0700 Subject: [PATCH 403/481] target/user: Only support full command pass-through After much discussion, give up on only passing a subset of SCSI commands to userspace and pass them all. Based on what pscsi is doing, make sure to set SCF_SCSI_DATA_CDB for I/O ops, and define attributes identical to pscsi. Make hw_block_size configurable via dev param. Remove mention of command filtering from tcmu-design.txt. Signed-off-by: Andy Grover Reviewed-by: Ilias Tsitsimpis Signed-off-by: Nicholas Bellinger --- Documentation/target/tcmu-design.txt | 21 +---- drivers/target/target_core_user.c | 126 ++++++++++++++++----------- 2 files changed, 76 insertions(+), 71 deletions(-) diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index b495108c433c..263b907517ac 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -15,8 +15,7 @@ Contents: a) Discovering and configuring TCMU uio devices b) Waiting for events on the device(s) c) Managing the command ring -3) Command filtering -4) A final note +3) A final note TCM Userspace Design @@ -364,24 +363,6 @@ int handle_device_events(int fd, void *map) } -Command filtering ------------------ - -Initial TCMU support is for a filtered commandset. Only IO-related -commands are presented to userspace, and the rest are handled by LIO's -in-kernel command emulation. The commands presented are all versions -of: - -READ -WRITE -WRITE_VERIFY -XDWRITEREAD -WRITE_SAME -COMPARE_AND_WRITE -SYNCHRONIZE_CACHE -UNMAP - - A final note ------------ diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0e0feeaec39c..90e084ea7b92 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -938,12 +938,13 @@ static void tcmu_free_device(struct se_device *dev) } enum { - Opt_dev_config, Opt_dev_size, Opt_err, + Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err, }; static match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%u"}, + {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_err, NULL} }; @@ -954,6 +955,7 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; int ret = 0, token; + unsigned long tmp_ul; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -986,6 +988,24 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; + case Opt_hw_block_size: + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtoul(arg_p, 0, &tmp_ul); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for hw_block_size=\n"); + break; + } + if (!tmp_ul) { + pr_err("hw_block_size must be nonzero\n"); + break; + } + dev->dev_attrib.hw_block_size = tmp_ul; + break; default: break; } @@ -1015,20 +1035,6 @@ static sector_t tcmu_get_blocks(struct se_device *dev) dev->dev_attrib.block_size); } -static sense_reason_t -tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents, - enum dma_data_direction data_direction) -{ - int ret; - - ret = tcmu_queue_cmd(se_cmd); - - if (ret != 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - else - return TCM_NO_SENSE; -} - static sense_reason_t tcmu_pass_op(struct se_cmd *se_cmd) { @@ -1040,52 +1046,70 @@ tcmu_pass_op(struct se_cmd *se_cmd) return TCM_NO_SENSE; } -static struct sbc_ops tcmu_sbc_ops = { - .execute_rw = tcmu_execute_rw, - .execute_sync_cache = tcmu_pass_op, - .execute_write_same = tcmu_pass_op, - .execute_write_same_unmap = tcmu_pass_op, - .execute_unmap = tcmu_pass_op, -}; - static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - return sbc_parse_cdb(cmd, &tcmu_sbc_ops); + unsigned char *cdb = cmd->t_task_cdb; + + /* + * For REPORT LUNS we always need to emulate the response, for everything + * else, pass it up. + */ + if (cdb[0] == REPORT_LUNS) { + cmd->execute_cmd = spc_emulate_report_luns; + return TCM_NO_SENSE; + } + + /* Set DATA_CDB flag for ops that should have it */ + switch (cdb[0]) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case 0x8e: /* WRITE_VERIFY_16 */ + case COMPARE_AND_WRITE: + case XDWRITEREAD_10: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + case VARIABLE_LENGTH_CMD: + switch (get_unaligned_be16(&cdb[8])) { + case READ_32: + case WRITE_32: + case 0x0c: /* WRITE_VERIFY_32 */ + case XDWRITEREAD_32: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + } + } + + cmd->execute_cmd = tcmu_pass_op; + + return TCM_NO_SENSE; } -DEF_TB_DEFAULT_ATTRIBS(tcmu); +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); +TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size); +TB_DEV_ATTR_RO(tcmu, hw_block_size); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors); +TB_DEV_ATTR_RO(tcmu, hw_max_sectors); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth); +TB_DEV_ATTR_RO(tcmu, hw_queue_depth); static struct configfs_attribute *tcmu_backend_dev_attrs[] = { - &tcmu_dev_attrib_emulate_model_alias.attr, - &tcmu_dev_attrib_emulate_dpo.attr, - &tcmu_dev_attrib_emulate_fua_write.attr, - &tcmu_dev_attrib_emulate_fua_read.attr, - &tcmu_dev_attrib_emulate_write_cache.attr, - &tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr, - &tcmu_dev_attrib_emulate_tas.attr, - &tcmu_dev_attrib_emulate_tpu.attr, - &tcmu_dev_attrib_emulate_tpws.attr, - &tcmu_dev_attrib_emulate_caw.attr, - &tcmu_dev_attrib_emulate_3pc.attr, - &tcmu_dev_attrib_pi_prot_type.attr, &tcmu_dev_attrib_hw_pi_prot_type.attr, - &tcmu_dev_attrib_pi_prot_format.attr, - &tcmu_dev_attrib_enforce_pr_isids.attr, - &tcmu_dev_attrib_is_nonrot.attr, - &tcmu_dev_attrib_emulate_rest_reord.attr, - &tcmu_dev_attrib_force_pr_aptpl.attr, &tcmu_dev_attrib_hw_block_size.attr, - &tcmu_dev_attrib_block_size.attr, &tcmu_dev_attrib_hw_max_sectors.attr, - &tcmu_dev_attrib_optimal_sectors.attr, &tcmu_dev_attrib_hw_queue_depth.attr, - &tcmu_dev_attrib_queue_depth.attr, - &tcmu_dev_attrib_max_unmap_lba_count.attr, - &tcmu_dev_attrib_max_unmap_block_desc_count.attr, - &tcmu_dev_attrib_unmap_granularity.attr, - &tcmu_dev_attrib_unmap_granularity_alignment.attr, - &tcmu_dev_attrib_max_write_same_len.attr, NULL, }; @@ -1094,7 +1118,7 @@ static struct se_subsystem_api tcmu_template = { .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, + .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, From 7bfea53b5c936d706d0bf60ec218fa72cde77121 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:40 -0700 Subject: [PATCH 404/481] target: Move passthrough CDB parsing into a common function Aside from whether they handle BIDI ops or not, parsing of the CDB by kernel and user SCSI passthrough modules should be identical. Move this into a new passthrough_parse_cdb() and call it from tcm-pscsi and tcm-user. Reported-by: Christoph Hellwig Reviewed-by: Ilias Tsitsimpis Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 74 ++++++++++++++++++++++++++++ drivers/target/target_core_pscsi.c | 53 +------------------- drivers/target/target_core_user.c | 43 +--------------- include/target/target_core_backend.h | 2 + 4 files changed, 78 insertions(+), 94 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 7faa6aef9a4d..56b20dc54087 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1707,3 +1708,76 @@ void core_dev_release_virtual_lun0(void) target_free_device(g_lun0_dev); core_delete_hba(hba); } + +/* + * Common CDB parsing for kernel and user passthrough. + */ +sense_reason_t +passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)) +{ + unsigned char *cdb = cmd->t_task_cdb; + + /* + * Clear a lun set in the cdb if the initiator talking to use spoke + * and old standards version, as we can't assume the underlying device + * won't choke up on it. + */ + switch (cdb[0]) { + case READ_10: /* SBC - RDProtect */ + case READ_12: /* SBC - RDProtect */ + case READ_16: /* SBC - RDProtect */ + case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ + case VERIFY: /* SBC - VRProtect */ + case VERIFY_16: /* SBC - VRProtect */ + case WRITE_VERIFY: /* SBC - VRProtect */ + case WRITE_VERIFY_12: /* SBC - VRProtect */ + case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ + break; + default: + cdb[1] &= 0x1f; /* clear logical unit number */ + break; + } + + /* + * For REPORT LUNS we always need to emulate the response, for everything + * else, pass it up. + */ + if (cdb[0] == REPORT_LUNS) { + cmd->execute_cmd = spc_emulate_report_luns; + return TCM_NO_SENSE; + } + + /* Set DATA_CDB flag for ops that should have it */ + switch (cdb[0]) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case 0x8e: /* WRITE_VERIFY_16 */ + case COMPARE_AND_WRITE: + case XDWRITEREAD_10: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + case VARIABLE_LENGTH_CMD: + switch (get_unaligned_be16(&cdb[8])) { + case READ_32: + case WRITE_32: + case 0x0c: /* WRITE_VERIFY_32 */ + case XDWRITEREAD_32: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + } + } + + cmd->execute_cmd = exec_cmd; + + return TCM_NO_SENSE; +} +EXPORT_SYMBOL(passthrough_parse_cdb); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 4073869d2090..53bd0eb57095 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -973,64 +973,13 @@ fail: return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } -/* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ -static inline void pscsi_clear_cdb_lun(unsigned char *cdb) -{ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } -} - static sense_reason_t pscsi_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - if (cmd->se_cmd_flags & SCF_BIDI) return TCM_UNSUPPORTED_SCSI_OPCODE; - pscsi_clear_cdb_lun(cdb); - - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else the default for pSCSI is to pass the command to the underlying - * LLD / physical hardware. - */ - switch (cdb[0]) { - case REPORT_LUNS: - cmd->execute_cmd = spc_emulate_report_luns; - return 0; - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - /* FALLTHROUGH*/ - default: - cmd->execute_cmd = pscsi_execute_cmd; - return 0; - } + return passthrough_parse_cdb(cmd, pscsi_execute_cmd); } static sense_reason_t diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 90e084ea7b92..2768ea2cfd7a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1049,48 +1049,7 @@ tcmu_pass_op(struct se_cmd *se_cmd) static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else, pass it up. - */ - if (cdb[0] == REPORT_LUNS) { - cmd->execute_cmd = spc_emulate_report_luns; - return TCM_NO_SENSE; - } - - /* Set DATA_CDB flag for ops that should have it */ - switch (cdb[0]) { - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - case WRITE_VERIFY_12: - case 0x8e: /* WRITE_VERIFY_16 */ - case COMPARE_AND_WRITE: - case XDWRITEREAD_10: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - break; - case VARIABLE_LENGTH_CMD: - switch (get_unaligned_be16(&cdb[8])) { - case READ_32: - case WRITE_32: - case 0x0c: /* WRITE_VERIFY_32 */ - case XDWRITEREAD_32: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - break; - } - } - - cmd->execute_cmd = tcmu_pass_op; - - return TCM_NO_SENSE; + return passthrough_parse_cdb(cmd, tcmu_pass_op); } DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index d61be7297b2c..563e11970152 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -138,5 +138,7 @@ int se_dev_set_queue_depth(struct se_device *, u32); int se_dev_set_max_sectors(struct se_device *, u32); int se_dev_set_optimal_sectors(struct se_device *, u32); int se_dev_set_block_size(struct se_device *, u32); +sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); #endif /* TARGET_CORE_BACKEND_H */ From a3541703ebbf99d499656b15987175f6579b42ac Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:41 -0700 Subject: [PATCH 405/481] target: Use a PASSTHROUGH flag instead of transport_types It seems like we only care if a transport is passthrough or not. Convert transport_type to a flags field and replace TRANSPORT_PLUGIN_* with a flag, TRANSPORT_FLAG_PASSTHROUGH. Signed-off-by: Andy Grover Reviewed-by: Ilias Tsitsimpis Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 4 ++-- drivers/target/target_core_configfs.c | 10 +++++----- drivers/target/target_core_device.c | 4 ++-- drivers/target/target_core_file.c | 1 - drivers/target/target_core_iblock.c | 1 - drivers/target/target_core_pr.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- drivers/target/target_core_rd.c | 1 - drivers/target/target_core_transport.c | 6 +++--- drivers/target/target_core_user.c | 2 +- include/target/target_core_backend.h | 6 ++---- 11 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 75cbde1f7c5b..4f8d4d459aa4 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -704,7 +704,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (!port) @@ -2377,7 +2377,7 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 1580077971f8..e7b0430a0575 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -811,7 +811,7 @@ static ssize_t target_core_dev_pr_show_attr_res_holder(struct se_device *dev, { int ret; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "Passthrough\n"); spin_lock(&dev->dev_reservation_lock); @@ -962,7 +962,7 @@ SE_DEV_PR_ATTR_RO(res_pr_type); static ssize_t target_core_dev_pr_show_attr_res_type( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "SPC_PASSTHROUGH\n"); else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); @@ -975,7 +975,7 @@ SE_DEV_PR_ATTR_RO(res_type); static ssize_t target_core_dev_pr_show_attr_res_aptpl_active( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -990,7 +990,7 @@ SE_DEV_PR_ATTR_RO(res_aptpl_active); static ssize_t target_core_dev_pr_show_attr_res_aptpl_metadata( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1037,7 +1037,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( u16 port_rpti = 0, tpgt = 0; u8 type = 0, scope; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 56b20dc54087..ce5f768181ff 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -528,7 +528,7 @@ static void core_export_port( list_add_tail(&port->sep_list, &dev->dev_sep_list); spin_unlock(&dev->se_port_lock); - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port); if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) { @@ -1604,7 +1604,7 @@ int target_configure_device(struct se_device *dev) * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI * passthrough because this is being provided by the backend LLD. */ - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV) { + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) { strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8); strncpy(&dev->t10_wwn.model[0], dev->transport->inquiry_prod, 16); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index f7e6e51aed36..3f27bfd816d8 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -958,7 +958,6 @@ static struct se_subsystem_api fileio_template = { .inquiry_prod = "FILEIO", .inquiry_rev = FD_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = fd_attach_hba, .detach_hba = fd_detach_hba, .alloc_device = fd_alloc_device, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 1b7947c2510f..8c965683789f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -904,7 +904,6 @@ static struct se_subsystem_api iblock_template = { .inquiry_prod = "IBLOCK", .inquiry_rev = IBLOCK_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = iblock_attach_hba, .detach_hba = iblock_detach_hba, .alloc_device = iblock_alloc_device, diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index b7c81acf08d0..a15411c79ae9 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4093,7 +4093,7 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; spin_lock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 53bd0eb57095..ecc5eaef13d6 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1141,7 +1141,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = { static struct se_subsystem_api pscsi_template = { .name = "pscsi", .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index a263bf5fab8d..d16489b6a1a4 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -733,7 +733,6 @@ static struct se_subsystem_api rd_mcp_template = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", .inquiry_rev = RD_MCP_VERSION, - .transport_type = TRANSPORT_PLUGIN_VHBA_VDEV, .attach_hba = rd_attach_hba, .detach_hba = rd_detach_hba, .alloc_device = rd_alloc_device, diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 90c92f04a586..675f2d9d1f14 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1196,7 +1196,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd) * Check if SAM Task Attribute emulation is enabled for this * struct se_device storage object */ - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (cmd->sam_task_attr == TCM_ACA_TAG) { @@ -1787,7 +1787,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; /* @@ -1912,7 +1912,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2768ea2cfd7a..07d2996d8c1f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1077,7 +1077,7 @@ static struct se_subsystem_api tcmu_template = { .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 563e11970152..5f1225706993 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -1,9 +1,7 @@ #ifndef TARGET_CORE_BACKEND_H #define TARGET_CORE_BACKEND_H -#define TRANSPORT_PLUGIN_PHBA_PDEV 1 -#define TRANSPORT_PLUGIN_VHBA_PDEV 2 -#define TRANSPORT_PLUGIN_VHBA_VDEV 3 +#define TRANSPORT_FLAG_PASSTHROUGH 1 struct target_backend_cits { struct config_item_type tb_dev_cit; @@ -22,7 +20,7 @@ struct se_subsystem_api { char inquiry_rev[4]; struct module *owner; - u8 transport_type; + u8 transport_flags; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); From b2feda4feb1b0ea74c0916b4a35b038bbfef9c82 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 29 May 2015 23:12:10 -0700 Subject: [PATCH 406/481] iser-target: Fix error path in isert_create_pi_ctx() We don't assign pi_ctx to desc->pi_ctx until we're certain to succeed in the function. That means the cleanup path should use the local pi_ctx variable, not desc->pi_ctx. This was detected by Coverity (CID 1260062). Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 327529ee85eb..3f40319a55da 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -547,11 +547,11 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, return 0; err_prot_mr: - ib_dereg_mr(desc->pi_ctx->prot_mr); + ib_dereg_mr(pi_ctx->prot_mr); err_prot_frpl: - ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); + ib_free_fast_reg_page_list(pi_ctx->prot_frpl); err_pi_ctx: - kfree(desc->pi_ctx); + kfree(pi_ctx); return ret; } From 71d9f6149cac8fc6646adfb2a6f3b0de6ddd23f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 May 2015 04:42:54 -0700 Subject: [PATCH 407/481] bridge: fix br_multicast_query_expired() bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit br_multicast_query_expired() querier argument is a pointer to a struct bridge_mcast_querier : struct bridge_mcast_querier { struct br_ip addr; struct net_bridge_port __rcu *port; }; Intent of the code was to clear port field, not the pointer to querier. Fixes: 2cd4143192e8 ("bridge: memorize and export selected IGMP/MLD querier port") Signed-off-by: Eric Dumazet Acked-by: Thadeu Lima de Souza Cascardo Acked-by: Linus Lüssing Cc: Linus Lüssing Cc: Steinar H. Gunderson Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a3abe6ed111e..22fd0419b314 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1822,7 +1822,7 @@ static void br_multicast_query_expired(struct net_bridge *br, if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; - RCU_INIT_POINTER(querier, NULL); + RCU_INIT_POINTER(querier->port, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } From e236b954232808001f522c4b79df97b8c9262a4a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 28 May 2015 23:10:06 +0200 Subject: [PATCH 408/481] bna: fix firmware loading on big-endian machines Firmware required by bna is stored in appropriate files as sequence of LE32 integers. After loading by request_firmware() they need to be byte-swapped on big-endian arches. Without this conversion the NIC is unusable on big-endian machines. Cc: Rasesh Mody Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/cna_fwimg.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/brocade/bna/cna_fwimg.c b/drivers/net/ethernet/brocade/bna/cna_fwimg.c index ebf462d8082f..badea368bdc8 100644 --- a/drivers/net/ethernet/brocade/bna/cna_fwimg.c +++ b/drivers/net/ethernet/brocade/bna/cna_fwimg.c @@ -30,6 +30,7 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, u32 *bfi_image_size, char *fw_name) { const struct firmware *fw; + u32 n; if (request_firmware(&fw, fw_name, &pdev->dev)) { pr_alert("Can't locate firmware %s\n", fw_name); @@ -40,6 +41,12 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, *bfi_image_size = fw->size/sizeof(u32); bfi_fw = fw; + /* Convert loaded firmware to host order as it is stored in file + * as sequence of LE32 integers. + */ + for (n = 0; n < *bfi_image_size; n++) + le32_to_cpus(*bfi_image + n); + return *bfi_image; error: return NULL; From 4918eb1e7cd3b8a41ebf56b5fabaa334139b919f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 28 May 2015 23:10:07 +0200 Subject: [PATCH 409/481] bna: remove unreasonable iocpf timer start Driver starts iocpf timer prior bnad_ioceth_enable() call and this is unreasonable. This piece of code probably originates from Brocade/Qlogic out-of-box driver during initial import into upstream. This driver uses only one timer and queue to implement multiple timers and this timer is started at this place. The upstream driver uses multiple timers instead of this. Cc: Rasesh Mody Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 37072a83f9d6..caae6cb2bc1a 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3701,10 +3701,6 @@ bnad_pci_probe(struct pci_dev *pdev, setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, ((unsigned long)bnad)); - /* Now start the timer before calling IOC */ - mod_timer(&bnad->bna.ioceth.ioc.iocpf_timer, - jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ)); - /* * Start the chip * If the call back comes with error, we bail out. From 4818e856475b309667ee38d4d0f2e3c1b933feef Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 28 May 2015 23:10:08 +0200 Subject: [PATCH 410/481] bna: fix soft lock-up during firmware initialization failure Bug in the driver initialization causes soft-lockup if firmware initialization timeout is reached. Polling function bfa_ioc_poll_fwinit() incorrectly calls bfa_nw_iocpf_timeout() when the timeout is reached. The problem is that bfa_nw_iocpf_timeout() calls again bfa_ioc_poll_fwinit()... etc. The bfa_ioc_poll_fwinit() should directly send timeout event for iocpf and the same should be done if firmware download into HW fails. Cc: Rasesh Mody Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 594a2ab36d31..68f3c13c9ef6 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -2414,7 +2414,7 @@ bfa_ioc_boot(struct bfa_ioc *ioc, enum bfi_fwboot_type boot_type, if (status == BFA_STATUS_OK) bfa_ioc_lpu_start(ioc); else - bfa_nw_iocpf_timeout(ioc); + bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT); return status; } @@ -3029,7 +3029,7 @@ bfa_ioc_poll_fwinit(struct bfa_ioc *ioc) } if (ioc->iocpf.poll_time >= BFA_IOC_TOV) { - bfa_nw_iocpf_timeout(ioc); + bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT); } else { ioc->iocpf.poll_time += BFA_IOC_POLL_TOV; mod_timer(&ioc->iocpf_timer, jiffies + From 9eb0a5d1905235b968dce5c1fda294ac2663d840 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Fri, 29 May 2015 12:25:54 +0100 Subject: [PATCH 411/481] sfc: free multiple Rx buffers when required When Rx packet data must be dropped, all the buffers associated with that Rx packet must be freed. Extend and rename efx_free_rx_buffer() to efx_free_rx_buffers() and loop through all the fragments. By doing so this patch fixes a possible memory leak. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx.c | 42 +++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index c0ad95d2f63d..809ea4610a77 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -224,12 +224,17 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx, } } -static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf) +static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs) { - if (rx_buf->page) { - put_page(rx_buf->page); - rx_buf->page = NULL; - } + do { + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; + } + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--num_bufs); } /* Attempt to recycle the page if there is an RX recycle ring; the page can @@ -278,7 +283,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, /* If this is the last buffer in a page, unmap and free it. */ if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { efx_unmap_rx_buffer(rx_queue->efx, rx_buf); - efx_free_rx_buffer(rx_buf); + efx_free_rx_buffers(rx_queue, rx_buf, 1); } rx_buf->page = NULL; } @@ -304,10 +309,7 @@ static void efx_discard_rx_packet(struct efx_channel *channel, efx_recycle_rx_pages(channel, rx_buf, n_frags); - do { - efx_free_rx_buffer(rx_buf); - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--n_frags); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); } /** @@ -431,11 +433,10 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, skb = napi_get_frags(napi); if (unlikely(!skb)) { - while (n_frags--) { - put_page(rx_buf->page); - rx_buf->page = NULL; - rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); - } + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); return; } @@ -622,7 +623,10 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); if (unlikely(skb == NULL)) { - efx_free_rx_buffer(rx_buf); + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); return; } skb_record_rx_queue(skb, channel->rx_queue.core_index); @@ -661,8 +665,12 @@ void __efx_rx_packet(struct efx_channel *channel) * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { + struct efx_rx_queue *rx_queue; + efx_loopback_rx_packet(efx, eh, rx_buf->len); - efx_free_rx_buffer(rx_buf); + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); goto out; } From c65b99f046843d2455aa231747b5a07a999a9f3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 May 2015 19:01:07 -0700 Subject: [PATCH 412/481] Linux 4.1-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 92a707859205..aee7e5cb4c15 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* From beb39db59d14990e401e235faf66a6b9b31240b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 30 May 2015 09:16:53 -0700 Subject: [PATCH 413/481] udp: fix behavior of wrong checksums We have two problems in UDP stack related to bogus checksums : 1) We return -EAGAIN to application even if receive queue is not empty. This breaks applications using edge trigger epoll() 2) Under UDP flood, we can loop forever without yielding to other processes, potentially hanging the host, especially on non SMP. This patch is an attempt to make things better. We might in the future add extra support for rt applications wanting to better control time spent doing a recv() in a hostile environment. For example we could validate checksums before queuing packets in socket receive queue. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d10b7e0112eb..1c92ea67baef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1345,10 +1345,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c2ec41617a35..e51fc3eee6db 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -525,10 +525,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } From 9f950415e4e28e7cfae2e416b43e862e8101d996 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 29 May 2015 13:47:07 -0400 Subject: [PATCH 414/481] tcp: fix child sockets to use system default congestion control if not set Linux 3.17 and earlier are explicitly engineered so that if the app doesn't specifically request a CC module on a listener before the SYN arrives, then the child gets the system default CC when the connection is established. See tcp_init_congestion_control() in 3.17 or earlier, which says "if no choice made yet assign the current value set as default". The change ("net: tcp: assign tcp cong_ops when tcp sk is created") altered these semantics, so that children got their parent listener's congestion control even if the system default had changed after the listener was created. This commit returns to those original semantics from 3.17 and earlier, since they are the original semantics from 2007 in 4d4d3d1e8 ("[TCP]: Congestion control initialization."), and some Linux congestion control workflows depend on that. In summary, if a listener socket specifically sets TCP_CONGESTION to "x", or the route locks the CC module to "x", then the child gets "x". Otherwise the child gets current system default from net.ipv4.tcp_congestion_control. That's the behavior in 3.17 and earlier, and this commit restores that. Fixes: 55d8694fa82c ("net: tcp: assign tcp cong_ops when tcp sk is created") Cc: Florian Westphal Cc: Daniel Borkmann Cc: Glenn Judd Cc: Stephen Hemminger Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 3 ++- net/ipv4/tcp_cong.c | 5 ++++- net/ipv4/tcp_minisocks.c | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 497bc14cdb85..0320bbb7d7b5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -98,7 +98,8 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state:7, + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7a5ae50c80c8..84be008c945c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; + icsk->icsk_ca_setsockopt = 1; if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); @@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ - if (ca == icsk->icsk_ca_ops) + if (ca == icsk->icsk_ca_ops) { + icsk->icsk_ca_setsockopt = 1; goto out; + } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b5732a54f2ad..17e7339ee5ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) rcu_read_unlock(); } - if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + /* If no valid choice made yet, assign current system default ca. */ + if (!ca_got_dst && + (!icsk->icsk_ca_setsockopt || + !try_module_get(icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); From 24595346d79b6bd98a77d24c493e8490639788fc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 29 May 2015 10:29:46 -0700 Subject: [PATCH 415/481] net: dsa: Properly propagate errors from dsa_switch_setup_one While shuffling some code around, dsa_switch_setup_one() was introduced, and it was modified to return either an error code using ERR_PTR() or a NULL pointer when running out of memory or failing to setup a switch. This is a problem for its caler: dsa_switch_setup() which uses IS_ERR() and expects to find an error code, not a NULL pointer, so we still try to proceed with dsa_switch_setup() and operate on invalid memory addresses. This can be easily reproduced by having e.g: the bcm_sf2 driver built-in, but having no such switch, such that drv->setup will fail. Fix this by using PTR_ERR() consistently which is both more informative and avoids for the caller to use IS_ERR_OR_NULL(). Fixes: df197195a5248 ("net: dsa: split dsa_switch_setup into two functions") Reported-by: Andrew Lunn Signed-off-by: Florian Fainelli Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e6f6cc3a1bcf..392e29a0227d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -359,7 +359,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, */ ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); if (ds == NULL) - return NULL; + return ERR_PTR(-ENOMEM); ds->dst = dst; ds->index = index; @@ -370,7 +370,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ret = dsa_switch_setup_one(ds, parent); if (ret) - return NULL; + return ERR_PTR(ret); return ds; } From 8642ad1c7ba43b1c48eb39a863e975f3f8f96168 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 1 May 2015 14:05:39 +0200 Subject: [PATCH 416/481] sparc: kernel: GRPCI2: Remove a useless memset grpci2priv is allocated using kzalloc, so there is no need to memset it. Signed-off-by: Christophe Jaillet Signed-off-by: David S. Miller --- arch/sparc/kernel/leon_pci_grpci2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 94e392bdee7d..814fb1729b12 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev) err = -ENOMEM; goto err1; } - memset(grpci2priv, 0, sizeof(*grpci2priv)); priv->regs = regs; priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */ priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT; From f0c1a1173773a56d500f1814893e63f97580f76a Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Wed, 27 May 2015 11:59:19 -0400 Subject: [PATCH 417/481] sparc64: pci slots information is not populated in sysfs Add PCI slot numbers within sysfs for PCIe hardware. Larger PCIe systems with nested PCI bridges and slots further down on these bridges were not being populated within sysfs. This will add ACPI style PCI slot numbers for these systems since the OF 'slot-names' information is not available on all PCIe platforms. Signed-off-by: Eric Snowberg Reviewed-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/kernel/pci.c | 59 +++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 6f7251fd2eab..c928bc64b4ba 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1002,6 +1002,38 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); #ifdef CONFIG_SYSFS + +#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */ + +static void pcie_bus_slot_names(struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + char name[SLOT_NAME_SIZE]; + struct pci_slot *pci_slot; + const u32 *slot_num; + int len; + + slot_num = of_get_property(pdev->dev.of_node, + "physical-slot#", &len); + + if (slot_num == NULL || len != 4) + continue; + + snprintf(name, sizeof(name), "%u", slot_num[0]); + pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL); + + if (IS_ERR(pci_slot)) + pr_err("PCI: pci_create_slot returned %ld.\n", + PTR_ERR(pci_slot)); + } + + list_for_each_entry(bus, &pbus->children, node) + pcie_bus_slot_names(bus); +} + static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) { const struct pci_slot_names { @@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void) while ((pbus = pci_find_next_bus(pbus)) != NULL) { struct device_node *node; + struct pci_dev *pdev; - if (pbus->self) { - /* PCI->PCI bridge */ - node = pbus->self->dev.of_node; + pdev = list_first_entry(&pbus->devices, struct pci_dev, + bus_list); + + if (pdev && pci_is_pcie(pdev)) { + pcie_bus_slot_names(pbus); } else { - struct pci_pbm_info *pbm = pbus->sysdata; - /* Host PCI controller */ - node = pbm->op->dev.of_node; + if (pbus->self) { + + /* PCI->PCI bridge */ + node = pbus->self->dev.of_node; + + } else { + struct pci_pbm_info *pbm = pbus->sysdata; + + /* Host PCI controller */ + node = pbm->op->dev.of_node; + } + + pci_bus_slot_names(node, pbus); } - - pci_bus_slot_names(node, pbus); } return 0; From 494e5b6faeda1d1e830a13e10b3c7bc323f35d97 Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Wed, 27 May 2015 10:00:46 -0600 Subject: [PATCH 418/481] sparc: Resolve conflict between sparc v9 and M7 on usage of bit 9 of TTE sparc: Resolve conflict between sparc v9 and M7 on usage of bit 9 of TTE Bit 9 of TTE is CV (Cacheable in V-cache) on sparc v9 processor while the same bit 9 is MCDE (Memory Corruption Detection Enable) on M7 processor. This creates a conflicting usage of the same bit. Kernel sets TTE.cv bit on all pages for sun4v architecture which works well for sparc v9 but enables memory corruption detection on M7 processor which is not the intent. This patch adds code to determine if kernel is running on M7 processor and takes steps to not enable memory corruption detection in TTE erroneously. Signed-off-by: Khalid Aziz Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_64.h | 22 ++++++++- arch/sparc/include/asm/trap_block.h | 2 + arch/sparc/kernel/entry.h | 2 + arch/sparc/kernel/setup_64.c | 21 ++++++++ arch/sparc/kernel/vmlinux.lds.S | 5 ++ arch/sparc/mm/init_64.c | 74 +++++++++++++++++++++-------- 6 files changed, 104 insertions(+), 22 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index dc165ebdf05a..2a52c91d2c8a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " sllx %1, 32, %1\n" " or %0, %1, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sethi %%hi(%4), %0\n" + " .word 662b\n" + " or %1, %%ulo(%4), %1\n" + " or %0, %%lo(%4), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_E_4V | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); @@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) " andn %0, %4, %0\n" " or %0, %5, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %6, %0\n" + " or %0, %5, %0\n" + " .previous\n" : "=r" (val) : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), - "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V), + "i" (_PAGE_CP_4V)); return __pgprot(val); } diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 6fd4436d32f0..ec9c04de3664 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry { }; extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, __sun4v_2insn_patch_end; +extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, + __sun_m7_2insn_patch_end; #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 07cc49e541f4..0f679421b468 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c38d19fc27ba..f7b261749383 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start, + struct sun4v_2insn_patch_entry *end) +{ + while (start < end) { + unsigned long addr = start->addr; + + *(unsigned int *) (addr + 0) = start->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = start->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + start++; + } +} + static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -267,6 +285,9 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); + if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7) + sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, + &__sun_m7_2insn_patch_end); sun4v_hvapi_init(); } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 09243057cb0b..f1a2f688b28a 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -138,6 +138,11 @@ SECTIONS *(.pause_3insn_patch) __pause_3insn_patch_end = .; } + .sun_m7_2insn_patch : { + __sun_m7_2insn_patch = .; + *(.sun_m7_2insn_patch) + __sun_m7_2insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ca0d6ba5ec8..559cb744112c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -54,6 +54,7 @@ #include "init_64.h" unsigned long kern_linear_pte_xor[4] __read_mostly; +static unsigned long page_cache4v_flag; /* A bitmap, two bits for every 256MB of physical memory. These two * bits determine what page size we use for kernel linear @@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void) static void __init sun4v_linear_pte_xor_finalize(void) { + unsigned long pagecv_flag; + + /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead + * enables MCD error. Do not set bit 9 on M7 processor. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + pagecv_flag = 0x00; + break; + default: + pagecv_flag = _PAGE_CV_4V; + break; + } #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; @@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; @@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; @@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void) return available; } +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + /* We need to exclude reserved regions. This exclusion will include * vmlinux and initrd. To be more precise the initrd size could be used to * compute a new lower limit because it is freed later during initialization. @@ -2034,6 +2055,25 @@ void __init paging_init(void) memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); #endif + /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde + * bit on M7 processor. This is a conflicting usage of the same + * bit. Enabling TTE.cv on M7 would turn on Memory Corruption + * Detection error on all pages and this will lead to problems + * later. Kernel does not run with MCD enabled and hence rest + * of the required steps to fully configure memory corruption + * detection are not taken. We need to ensure TTE.mcde is not + * set on M7 processor. Compute the value of cacheability + * flag for use later taking this into consideration. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + page_cache4v_flag = _PAGE_CP_4V; + break; + default: + page_cache4v_flag = _PAGE_CACHE_4V; + break; + } + if (tlb_type == hypervisor) sun4v_pgprot_init(); else @@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) -#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) -#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) -#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) -#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) -#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) - pgprot_t PAGE_KERNEL __read_mostly; EXPORT_SYMBOL(PAGE_KERNEL); @@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_P_4U | _PAGE_W_4U); if (tlb_type == hypervisor) pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V); pte_base |= _PAGE_PMD_HUGE; @@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void) int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | - _PAGE_CACHE_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | __ACCESS_BITS_4V | __DIRTY_BITS_4V | _PAGE_EXEC_4V); PAGE_KERNEL_LOCKED = PAGE_KERNEL; _PAGE_IE = _PAGE_IE_4V; _PAGE_E = _PAGE_E_4V; - _PAGE_CACHE = _PAGE_CACHE_4V; + _PAGE_CACHE = page_cache4v_flag; #ifdef CONFIG_DEBUG_PAGEALLOC kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; @@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void) kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ PAGE_OFFSET; #endif - kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V | + _PAGE_W_4V); for (i = 1; i < 4; i++) kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; @@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void) _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); - page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; - page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); - page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); - page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); page_exec_bit = _PAGE_EXEC_4V; @@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr) _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); if (tlb_type == hypervisor) val = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | _PAGE_EXEC_4V | _PAGE_W_4V); return val | paddr; From 8a7b19d8b542b87bccc3eaaf81dcc90a5ca48aea Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sat, 30 May 2015 17:39:25 +0200 Subject: [PATCH 419/481] include/uapi/linux/virtio_balloon.h: include linux/virtio_types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: unknown type name ‘__virtio16’ __virtio16 tag; Signed-off-by: Mikko Rapeli Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 984169a819ee..d7f1cbc3766c 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include +#include #include #include From d26e2c9ffa385dd1b646f43c1397ba12af9ed431 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Thu, 28 May 2015 10:26:18 +0200 Subject: [PATCH 420/481] Revert "netfilter: ensure number of counters is >0 in do_replace()" This partially reverts commit 1086bbe97a07 ("netfilter: ensure number of counters is >0 in do_replace()") in net/bridge/netfilter/ebtables.c. Setting rules with ebtables does not work any more with 1086bbe97a07 place. There is an error message and no rules set in the end. e.g. ~# ebtables -t nat -A POSTROUTING --src 12:34:56:78:9a:bc -j DROP Unable to update the kernel. Two possible causes: 1. Multiple ebtables programs were executing simultaneously. The ebtables userspace tool doesn't by default support multiple ebtables programs running Reverting the ebtables part of 1086bbe97a07 makes this work again. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 24c7c96bf5f8..91180a7fc943 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1117,8 +1117,6 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; tmp.name[sizeof(tmp.name) - 1] = 0; @@ -2161,8 +2159,6 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); From dc5e7a811d3e57f2b10a4c4c90b175ce498a097d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 1 Jun 2015 11:30:04 +0100 Subject: [PATCH 421/481] xen: netback: fix printf format string warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/xen-netback/netback.c: In function ‘xenvif_tx_build_gops’: drivers/net/xen-netback/netback.c:1253:8: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘int’ [-Wformat=] (txreq.offset&~PAGE_MASK) + txreq.size); ^ PAGE_MASK's type can vary by arch, so a cast is needed. Signed-off-by: Ian Campbell ---- v2: Cast to unsigned long, since PAGE_MASK can vary by arch. Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 4de46aa61d95..0d2594395ffb 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1250,7 +1250,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, netdev_err(queue->vif->dev, "txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, - (txreq.offset&~PAGE_MASK) + txreq.size); + (unsigned long)(txreq.offset&~PAGE_MASK) + txreq.size); xenvif_fatal_tx_err(queue->vif); break; } From 31a418986a5852034d520a5bab546821ff1ccf3d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 1 Jun 2015 11:30:24 +0100 Subject: [PATCH 422/481] xen: netback: read hotplug script once at start of day. When we come to tear things down in netback_remove() and generate the uevent it is possible that the xenstore directory has already been removed (details below). In such cases netback_uevent() won't be able to read the hotplug script and will write a xenstore error node. A recent change to the hypervisor exposed this race such that we now sometimes lose it (where apparently we didn't ever before). Instead read the hotplug script configuration during setup and use it for the lifetime of the backend device. The apparently more obvious fix of moving the transition to state=Closed in netback_remove() to after the uevent does not work because it is possible that we are already in state=Closed (in reaction to the guest having disconnected as it shutdown). Being already in Closed means the toolstack is at liberty to start tearing down the xenstore directories. In principal it might be possible to arrange to unregister the device sooner (e.g on transition to Closing) such that xenstore would still be there but this state machine is fragile and prone to anger... A modern Xen system only relies on the hotplug uevent for driver domains, when the backend is in the same domain as the toolstack it will run the necessary setup/teardown directly in the correct sequence wrt xenstore changes. Signed-off-by: Ian Campbell Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 33 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index fee02414529e..968787abf78d 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -34,6 +34,8 @@ struct backend_info { enum xenbus_state frontend_state; struct xenbus_watch hotplug_status_watch; u8 have_hotplug_status_watch:1; + + const char *hotplug_script; }; static int connect_rings(struct backend_info *be, struct xenvif_queue *queue); @@ -238,6 +240,7 @@ static int netback_remove(struct xenbus_device *dev) xenvif_free(be->vif); be->vif = NULL; } + kfree(be->hotplug_script); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -255,6 +258,7 @@ static int netback_probe(struct xenbus_device *dev, struct xenbus_transaction xbt; int err; int sg; + const char *script; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); if (!be) { @@ -347,6 +351,15 @@ static int netback_probe(struct xenbus_device *dev, if (err) pr_debug("Error writing multi-queue-max-queues\n"); + script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); + if (IS_ERR(script)) { + err = PTR_ERR(script); + xenbus_dev_fatal(dev, err, "reading script"); + goto fail; + } + + be->hotplug_script = script; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -379,22 +392,14 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be = dev_get_drvdata(&xdev->dev); - char *val; - val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); - if (IS_ERR(val)) { - int err = PTR_ERR(val); - xenbus_dev_fatal(xdev, err, "reading script"); - return err; - } else { - if (add_uevent_var(env, "script=%s", val)) { - kfree(val); - return -ENOMEM; - } - kfree(val); - } + if (!be) + return 0; - if (!be || !be->vif) + if (add_uevent_var(env, "script=%s", be->hotplug_script)) + return -ENOMEM; + + if (!be->vif) return 0; return add_uevent_var(env, "vif=%s", be->vif->dev->name); From c6e36d8c1a76be7a7afa2669483857dadec1e99c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 1 Jun 2015 15:08:18 +0300 Subject: [PATCH 423/481] bnx2x: Move statistics implementation into semaphores Commit dff173de84958 ("bnx2x: Fix statistics locking scheme") changed the bnx2x locking around statistics state into using a mutex - but the lock is being accessed via a timer which is forbidden. [If compiled with CONFIG_DEBUG_MUTEXES, logs show a warning about accessing the mutex in interrupt context] This moves the implementation into using a semaphore [with size '1'] instead. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 +++++---- .../net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 20 +++++++++++++------ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a3b0f7a0c61e..1f82a04ce01a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1774,7 +1774,7 @@ struct bnx2x { int stats_state; /* used for synchronization of concurrent threads statistics handling */ - struct mutex stats_lock; + struct semaphore stats_lock; /* used by dmae command loader */ struct dmae_command stats_dmae; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index fd52ce95127e..33501bcddc48 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12054,7 +12054,7 @@ static int bnx2x_init_bp(struct bnx2x *bp) mutex_init(&bp->port.phy_mutex); mutex_init(&bp->fw_mb_mutex); mutex_init(&bp->drv_info_mutex); - mutex_init(&bp->stats_lock); + sema_init(&bp->stats_lock, 1); bp->drv_info_mng_owner = false; INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task); @@ -13690,9 +13690,10 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) cancel_delayed_work_sync(&bp->sp_task); cancel_delayed_work_sync(&bp->period_task); - mutex_lock(&bp->stats_lock); - bp->stats_state = STATS_STATE_DISABLED; - mutex_unlock(&bp->stats_lock); + if (!down_timeout(&bp->stats_lock, HZ / 10)) { + bp->stats_state = STATS_STATE_DISABLED; + up(&bp->stats_lock); + } bnx2x_save_statistics(bp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index 266b055c2360..69d699f0730a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -1372,19 +1372,23 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event) * that context in case someone is in the middle of a transition. * For other events, wait a bit until lock is taken. */ - if (!mutex_trylock(&bp->stats_lock)) { + if (down_trylock(&bp->stats_lock)) { if (event == STATS_EVENT_UPDATE) return; DP(BNX2X_MSG_STATS, "Unlikely stats' lock contention [event %d]\n", event); - mutex_lock(&bp->stats_lock); + if (unlikely(down_timeout(&bp->stats_lock, HZ / 10))) { + BNX2X_ERR("Failed to take stats lock [event %d]\n", + event); + return; + } } bnx2x_stats_stm[state][event].action(bp); bp->stats_state = bnx2x_stats_stm[state][event].next_state; - mutex_unlock(&bp->stats_lock); + up(&bp->stats_lock); if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp)) DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n", @@ -1970,7 +1974,11 @@ int bnx2x_stats_safe_exec(struct bnx2x *bp, /* Wait for statistics to end [while blocking further requests], * then run supplied function 'safely'. */ - mutex_lock(&bp->stats_lock); + rc = down_timeout(&bp->stats_lock, HZ / 10); + if (unlikely(rc)) { + BNX2X_ERR("Failed to take statistics lock for safe execution\n"); + goto out_no_lock; + } bnx2x_stats_comp(bp); while (bp->stats_pending && cnt--) @@ -1988,7 +1996,7 @@ out: /* No need to restart statistics - if they're enabled, the timer * will restart the statistics. */ - mutex_unlock(&bp->stats_lock); - + up(&bp->stats_lock); +out_no_lock: return rc; } From 18ec898ee54e03a9aab8b54db50cb2b36209d313 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Jun 2015 14:43:50 -0700 Subject: [PATCH 424/481] Revert "net: core: 'ethtool' issue with querying phy settings" This reverts commit f96dee13b8e10f00840124255bed1d8b4c6afd6f. It isn't right, ethtool is meant to manage one PHY instance per netdevice at a time, and this is selected by the SET command. Therefore by definition the GET command must only return the settings for the configured and selected PHY. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1347e11f5cc9..1d00b8922902 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -359,15 +359,7 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) int err; struct ethtool_cmd cmd; - if (!dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - cmd.cmd = ETHTOOL_GSET; - - err = dev->ethtool_ops->get_settings(dev, &cmd); + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; From ccd740cbc6e01b2a08baa341867063ed2887f4b9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 29 May 2015 11:28:26 -0700 Subject: [PATCH 425/481] vti6: Add pmtu handling to vti6_xmit. We currently rely on the PMTU discovery of xfrm. However if a packet is localy sent, the PMTU mechanism of xfrm tries to to local socket notification what might not work for applications like ping that don't check for this. So add pmtu handling to vti6_xmit to report MTU changes immediately. Signed-off-by: Steffen Klassert Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ff3bd863fa03..0224c032dca5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -435,6 +435,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device *tdev; struct xfrm_state *x; int err = -1; + int mtu; if (!dst) goto tx_err_link_failure; @@ -468,6 +469,19 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; + mtu = dst_mtu(dst); + if (!skb->ignore_df && skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + + return -EMSGSIZE; + } + err = dst_output(skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); From 131484c8da97ed600c18dd9d03b661e8ae052df6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2015 12:21:47 +0200 Subject: [PATCH 426/481] x86/debug: Remove perpetually broken, unmaintainable dwarf annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So the dwarf2 annotations in low level assembly code have become an increasing hindrance: unreadable, messy macros mixed into some of the most security sensitive code paths of the Linux kernel. These debug info annotations don't even buy the upstream kernel anything: dwarf driven stack unwinding has caused problems in the past so it's out of tree, and the upstream kernel only uses the much more robust framepointers based stack unwinding method. In addition to that there's a steady, slow bitrot going on with these annotations, requiring frequent fixups. There's no tooling and no functionality upstream that keeps it correct. So burn down the sick forest, allowing new, healthier growth: 27 files changed, 350 insertions(+), 1101 deletions(-) Someone who has the willingness and time to do this properly can attempt to reintroduce dwarf debuginfo in x86 assembly code plus dwarf unwinding from first principles, with the following conditions: - it should be maximally readable, and maximally low-key to 'ordinary' code reading and maintenance. - find a build time method to insert dwarf annotations automatically in the most common cases, for pop/push instructions that manipulate the stack pointer. This could be done for example via a preprocessing step that just looks for common patterns - plus special annotations for the few cases where we want to depart from the default. We have hundreds of CFI annotations, so automating most of that makes sense. - it should come with build tooling checks that ensure that CFI annotations are sensible. We've seen such efforts from the framepointer side, and there's no reason it couldn't be done on the dwarf side. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frédéric Weisbecker Cc: Jan Beulich Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 10 +- arch/x86/ia32/ia32entry.S | 133 +++--------- arch/x86/include/asm/calling.h | 94 ++++----- arch/x86/include/asm/dwarf2.h | 170 --------------- arch/x86/include/asm/frame.h | 7 +- arch/x86/kernel/entry_32.S | 368 ++++++++++----------------------- arch/x86/kernel/entry_64.S | 288 +++++--------------------- arch/x86/lib/atomic64_386_32.S | 7 +- arch/x86/lib/atomic64_cx8_32.S | 61 ++---- arch/x86/lib/checksum_32.S | 52 ++--- arch/x86/lib/clear_page_64.S | 7 - arch/x86/lib/cmpxchg16b_emu.S | 12 +- arch/x86/lib/cmpxchg8b_emu.S | 11 +- arch/x86/lib/copy_page_64.S | 11 - arch/x86/lib/copy_user_64.S | 15 -- arch/x86/lib/csum-copy_64.S | 17 -- arch/x86/lib/getuser.S | 13 -- arch/x86/lib/iomap_copy_64.S | 3 - arch/x86/lib/memcpy_64.S | 3 - arch/x86/lib/memmove_64.S | 3 - arch/x86/lib/memset_64.S | 5 - arch/x86/lib/msr-reg.S | 44 ++-- arch/x86/lib/putuser.S | 8 +- arch/x86/lib/rwsem.S | 49 ++--- arch/x86/lib/thunk_32.S | 15 +- arch/x86/lib/thunk_64.S | 44 ++-- arch/x86/net/bpf_jit.S | 1 - 27 files changed, 350 insertions(+), 1101 deletions(-) delete mode 100644 arch/x86/include/asm/dwarf2.h diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 57996ee840dd..43e8328a23e4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -149,12 +149,6 @@ endif sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp -# do binutils support CFI? -cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) -# is .cfi_signal_frame supported too? -cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) -cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) - # does binutils support specific instructions? asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) @@ -162,8 +156,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 63450a596800..2be23c734db5 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -4,7 +4,6 @@ * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include #include #include #include @@ -60,17 +59,6 @@ movl %eax,%eax /* zero extension */ .endm - .macro CFI_STARTPROC32 simple - CFI_STARTPROC \simple - CFI_UNDEFINED r8 - CFI_UNDEFINED r9 - CFI_UNDEFINED r10 - CFI_UNDEFINED r11 - CFI_UNDEFINED r12 - CFI_UNDEFINED r13 - CFI_UNDEFINED r14 - CFI_UNDEFINED r15 - .endm #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret32) @@ -102,11 +90,6 @@ ENDPROC(native_usergs_sysret32) * with the int 0x80 path. */ ENTRY(ia32_sysenter_target) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rsp,rbp - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -121,25 +104,21 @@ ENTRY(ia32_sysenter_target) movl %eax, %eax movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d - CFI_REGISTER rip,r10 /* Construct struct pt_regs on stack */ - pushq_cfi $__USER32_DS /* pt_regs->ss */ - pushq_cfi %rbp /* pt_regs->sp */ - CFI_REL_OFFSET rsp,0 - pushfq_cfi /* pt_regs->flags */ - pushq_cfi $__USER32_CS /* pt_regs->cs */ - pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq %rbp /* pt_regs->sp */ + pushfq /* pt_regs->flags */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 /* * no need to do an access_ok check here because rbp has been @@ -161,8 +140,8 @@ sysenter_flags_fixed: orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - CFI_REMEMBER_STATE jnz sysenter_tracesys + sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -193,14 +172,12 @@ sysexit_from_sys_call: */ andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp),%ecx /* User %eip */ - CFI_REGISTER rip,rcx RESTORE_RSI_RDI xorl %edx,%edx /* avoid info leaks */ xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 movl EFLAGS(%rsp),%r11d /* User eflags */ - /*CFI_RESTORE rflags*/ TRACE_IRQS_ON /* @@ -231,8 +208,6 @@ sysexit_from_sys_call: */ USERGS_SYSRET32 - CFI_RESTORE_STATE - #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r8d /* 5th arg: 4th syscall arg */ @@ -282,8 +257,8 @@ sysexit_audit: #endif sysenter_fix_flags: - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) - popfq_cfi + pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq jmp sysenter_flags_fixed sysenter_tracesys: @@ -298,7 +273,6 @@ sysenter_tracesys: LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS jmp sysenter_do_call - CFI_ENDPROC ENDPROC(ia32_sysenter_target) /* @@ -332,12 +306,6 @@ ENDPROC(ia32_sysenter_target) * with the int 0x80 path. */ ENTRY(ia32_cstar_target) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -345,7 +313,6 @@ ENTRY(ia32_cstar_target) */ SWAPGS_UNSAFE_STACK movl %esp,%r8d - CFI_REGISTER rsp,r8 movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) @@ -353,22 +320,19 @@ ENTRY(ia32_cstar_target) movl %eax,%eax /* Construct struct pt_regs on stack */ - pushq_cfi $__USER32_DS /* pt_regs->ss */ - pushq_cfi %r8 /* pt_regs->sp */ - CFI_REL_OFFSET rsp,0 - pushq_cfi %r11 /* pt_regs->flags */ - pushq_cfi $__USER32_CS /* pt_regs->cs */ - pushq_cfi %rcx /* pt_regs->ip */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rbp /* pt_regs->cx */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq %r8 /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 /* * no need to do an access_ok check here because r8 has been @@ -380,8 +344,8 @@ ENTRY(ia32_cstar_target) ASM_CLAC orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - CFI_REMEMBER_STATE jnz cstar_tracesys + cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -403,15 +367,12 @@ sysretl_from_sys_call: andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) RESTORE_RSI_RDI_RDX movl RIP(%rsp),%ecx - CFI_REGISTER rip,rcx movl EFLAGS(%rsp),%r11d - /*CFI_REGISTER rflags,r11*/ xorq %r10,%r10 xorq %r9,%r9 xorq %r8,%r8 TRACE_IRQS_ON movl RSP(%rsp),%esp - CFI_RESTORE rsp /* * 64bit->32bit SYSRET restores eip from ecx, * eflags from r11 (but RF and VM bits are forced to 0), @@ -430,7 +391,6 @@ sysretl_from_sys_call: #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: - CFI_RESTORE_STATE movl %r9d,R9(%rsp) /* register to be clobbered by call */ auditsys_entry_common movl R9(%rsp),%r9d /* reload 6th syscall arg */ @@ -460,7 +420,6 @@ ia32_badarg: ASM_CLAC movq $-EFAULT,%rax jmp ia32_sysret - CFI_ENDPROC /* * Emulated IA32 system calls via int 0x80. @@ -484,15 +443,6 @@ ia32_badarg: */ ENTRY(ia32_syscall) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,5*8 - /*CFI_REL_OFFSET ss,4*8 */ - CFI_REL_OFFSET rsp,3*8 - /*CFI_REL_OFFSET rflags,2*8 */ - /*CFI_REL_OFFSET cs,1*8 */ - CFI_REL_OFFSET rip,0*8 - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -506,15 +456,14 @@ ENTRY(ia32_syscall) movl %eax,%eax /* Construct struct pt_regs on stack (iret frame is already on stack) */ - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -544,7 +493,6 @@ ia32_tracesys: LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS jmp ia32_do_call - CFI_ENDPROC END(ia32_syscall) .macro PTREGSCALL label, func @@ -554,8 +502,6 @@ GLOBAL(\label) jmp ia32_ptregs_common .endm - CFI_STARTPROC32 - PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn PTREGSCALL stub32_sigreturn, sys32_sigreturn PTREGSCALL stub32_fork, sys_fork @@ -569,23 +515,8 @@ GLOBAL(stub32_clone) ALIGN ia32_ptregs_common: - CFI_ENDPROC - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SIZEOF_PTREGS - CFI_REL_OFFSET rax,RAX - CFI_REL_OFFSET rcx,RCX - CFI_REL_OFFSET rdx,RDX - CFI_REL_OFFSET rsi,RSI - CFI_REL_OFFSET rdi,RDI - CFI_REL_OFFSET rip,RIP -/* CFI_REL_OFFSET cs,CS*/ -/* CFI_REL_OFFSET rflags,EFLAGS*/ - CFI_REL_OFFSET rsp,RSP -/* CFI_REL_OFFSET ss,SS*/ SAVE_EXTRA_REGS 8 call *%rax RESTORE_EXTRA_REGS 8 ret - CFI_ENDPROC END(ia32_ptregs_common) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 1c8b50edb2db..0d76accde45b 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -46,8 +46,6 @@ For 32-bit we have the following conventions - kernel is built with */ -#include - #ifdef CONFIG_X86_64 /* @@ -92,27 +90,26 @@ For 32-bit we have the following conventions - kernel is built with .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 subq $15*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET 15*8+\addskip .endm .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 .if \r11 - movq_cfi r11, 6*8+\offset + movq %r11, 6*8+\offset(%rsp) .endif .if \r8910 - movq_cfi r10, 7*8+\offset - movq_cfi r9, 8*8+\offset - movq_cfi r8, 9*8+\offset + movq %r10, 7*8+\offset(%rsp) + movq %r9, 8*8+\offset(%rsp) + movq %r8, 9*8+\offset(%rsp) .endif .if \rax - movq_cfi rax, 10*8+\offset + movq %rax, 10*8+\offset(%rsp) .endif .if \rcx - movq_cfi rcx, 11*8+\offset + movq %rcx, 11*8+\offset(%rsp) .endif - movq_cfi rdx, 12*8+\offset - movq_cfi rsi, 13*8+\offset - movq_cfi rdi, 14*8+\offset + movq %rdx, 12*8+\offset(%rsp) + movq %rsi, 13*8+\offset(%rsp) + movq %rdi, 14*8+\offset(%rsp) .endm .macro SAVE_C_REGS offset=0 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 @@ -131,24 +128,24 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SAVE_EXTRA_REGS offset=0 - movq_cfi r15, 0*8+\offset - movq_cfi r14, 1*8+\offset - movq_cfi r13, 2*8+\offset - movq_cfi r12, 3*8+\offset - movq_cfi rbp, 4*8+\offset - movq_cfi rbx, 5*8+\offset + movq %r15, 0*8+\offset(%rsp) + movq %r14, 1*8+\offset(%rsp) + movq %r13, 2*8+\offset(%rsp) + movq %r12, 3*8+\offset(%rsp) + movq %rbp, 4*8+\offset(%rsp) + movq %rbx, 5*8+\offset(%rsp) .endm .macro SAVE_EXTRA_REGS_RBP offset=0 - movq_cfi rbp, 4*8+\offset + movq %rbp, 4*8+\offset(%rsp) .endm .macro RESTORE_EXTRA_REGS offset=0 - movq_cfi_restore 0*8+\offset, r15 - movq_cfi_restore 1*8+\offset, r14 - movq_cfi_restore 2*8+\offset, r13 - movq_cfi_restore 3*8+\offset, r12 - movq_cfi_restore 4*8+\offset, rbp - movq_cfi_restore 5*8+\offset, rbx + movq 0*8+\offset(%rsp), %r15 + movq 1*8+\offset(%rsp), %r14 + movq 2*8+\offset(%rsp), %r13 + movq 3*8+\offset(%rsp), %r12 + movq 4*8+\offset(%rsp), %rbp + movq 5*8+\offset(%rsp), %rbx .endm .macro ZERO_EXTRA_REGS @@ -162,24 +159,24 @@ For 32-bit we have the following conventions - kernel is built with .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 .if \rstor_r11 - movq_cfi_restore 6*8, r11 + movq 6*8(%rsp), %r11 .endif .if \rstor_r8910 - movq_cfi_restore 7*8, r10 - movq_cfi_restore 8*8, r9 - movq_cfi_restore 9*8, r8 + movq 7*8(%rsp), %r10 + movq 8*8(%rsp), %r9 + movq 9*8(%rsp), %r8 .endif .if \rstor_rax - movq_cfi_restore 10*8, rax + movq 10*8(%rsp), %rax .endif .if \rstor_rcx - movq_cfi_restore 11*8, rcx + movq 11*8(%rsp), %rcx .endif .if \rstor_rdx - movq_cfi_restore 12*8, rdx + movq 12*8(%rsp), %rdx .endif - movq_cfi_restore 13*8, rsi - movq_cfi_restore 14*8, rdi + movq 13*8(%rsp), %rsi + movq 14*8(%rsp), %rdi .endm .macro RESTORE_C_REGS RESTORE_C_REGS_HELPER 1,1,1,1,1 @@ -205,7 +202,6 @@ For 32-bit we have the following conventions - kernel is built with .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 addq $15*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) .endm .macro icebp @@ -224,23 +220,23 @@ For 32-bit we have the following conventions - kernel is built with */ .macro SAVE_ALL - pushl_cfi_reg eax - pushl_cfi_reg ebp - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg edx - pushl_cfi_reg ecx - pushl_cfi_reg ebx + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx .endm .macro RESTORE_ALL - popl_cfi_reg ebx - popl_cfi_reg ecx - popl_cfi_reg edx - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi_reg ebp - popl_cfi_reg eax + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax .endm #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h deleted file mode 100644 index de1cdaf4d743..000000000000 --- a/arch/x86/include/asm/dwarf2.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _ASM_X86_DWARF2_H -#define _ASM_X86_DWARF2_H - -#ifndef __ASSEMBLY__ -#warning "asm/dwarf2.h should be only included in pure assembly files" -#endif - -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register -#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset -#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset -#define CFI_OFFSET .cfi_offset -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_REGISTER .cfi_register -#define CFI_RESTORE .cfi_restore -#define CFI_REMEMBER_STATE .cfi_remember_state -#define CFI_RESTORE_STATE .cfi_restore_state -#define CFI_UNDEFINED .cfi_undefined -#define CFI_ESCAPE .cfi_escape - -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) - /* - * Emit CFI data in .debug_frame sections, not .eh_frame sections. - * The latter we currently just discard since we don't do DWARF - * unwinding at runtime. So only the offline DWARF information is - * useful to anyone. Note we should not use this directive if this - * file is used in the vDSO assembly, or if vmlinux.lds.S gets - * changed so it doesn't discard .eh_frame. - */ - .cfi_sections .debug_frame -#endif - -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif - -/* - * An attempt to make CFI annotations more or less - * correct and shorter. It is implied that you know - * what you're doing if you use them. - */ -#ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_64 - .macro pushq_cfi reg - pushq \reg - CFI_ADJUST_CFA_OFFSET 8 - .endm - - .macro pushq_cfi_reg reg - pushq %\reg - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET \reg, 0 - .endm - - .macro popq_cfi reg - popq \reg - CFI_ADJUST_CFA_OFFSET -8 - .endm - - .macro popq_cfi_reg reg - popq %\reg - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE \reg - .endm - - .macro pushfq_cfi - pushfq - CFI_ADJUST_CFA_OFFSET 8 - .endm - - .macro popfq_cfi - popfq - CFI_ADJUST_CFA_OFFSET -8 - .endm - - .macro movq_cfi reg offset=0 - movq %\reg, \offset(%rsp) - CFI_REL_OFFSET \reg, \offset - .endm - - .macro movq_cfi_restore offset reg - movq \offset(%rsp), %\reg - CFI_RESTORE \reg - .endm -#else /*!CONFIG_X86_64*/ - .macro pushl_cfi reg - pushl \reg - CFI_ADJUST_CFA_OFFSET 4 - .endm - - .macro pushl_cfi_reg reg - pushl %\reg - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET \reg, 0 - .endm - - .macro popl_cfi reg - popl \reg - CFI_ADJUST_CFA_OFFSET -4 - .endm - - .macro popl_cfi_reg reg - popl %\reg - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE \reg - .endm - - .macro pushfl_cfi - pushfl - CFI_ADJUST_CFA_OFFSET 4 - .endm - - .macro popfl_cfi - popfl - CFI_ADJUST_CFA_OFFSET -4 - .endm - - .macro movl_cfi reg offset=0 - movl %\reg, \offset(%esp) - CFI_REL_OFFSET \reg, \offset - .endm - - .macro movl_cfi_restore offset reg - movl \offset(%esp), %\reg - CFI_RESTORE \reg - .endm -#endif /*!CONFIG_X86_64*/ -#endif /*__ASSEMBLY__*/ - -#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 3b629f47eb65..793179cf8e21 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -1,20 +1,17 @@ #ifdef __ASSEMBLY__ #include -#include /* The annotation hides the frame from the unwinder and makes it look like a ordinary ebp save/restore. This avoids some special cases for frame pointer later */ #ifdef CONFIG_FRAME_POINTER .macro FRAME - __ASM_SIZE(push,_cfi) %__ASM_REG(bp) - CFI_REL_OFFSET __ASM_REG(bp), 0 + __ASM_SIZE(push,) %__ASM_REG(bp) __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) .endm .macro ENDFRAME - __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) - CFI_RESTORE __ASM_REG(bp) + __ASM_SIZE(pop,) %__ASM_REG(bp) .endm #else .macro FRAME diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 1c309763e321..0ac73de925d1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -113,11 +112,10 @@ /* unfortunately push/pop can't be no-op */ .macro PUSH_GS - pushl_cfi $0 + pushl $0 .endm .macro POP_GS pop=0 addl $(4 + \pop), %esp - CFI_ADJUST_CFA_OFFSET -(4 + \pop) .endm .macro POP_GS_EX .endm @@ -137,16 +135,13 @@ #else /* CONFIG_X86_32_LAZY_GS */ .macro PUSH_GS - pushl_cfi %gs - /*CFI_REL_OFFSET gs, 0*/ + pushl %gs .endm .macro POP_GS pop=0 -98: popl_cfi %gs - /*CFI_RESTORE gs*/ +98: popl %gs .if \pop <> 0 add $\pop, %esp - CFI_ADJUST_CFA_OFFSET -\pop .endif .endm .macro POP_GS_EX @@ -170,11 +165,9 @@ .macro GS_TO_REG reg movl %gs, \reg - /*CFI_REGISTER gs, \reg*/ .endm .macro REG_TO_PTGS reg movl \reg, PT_GS(%esp) - /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg movl $(__KERNEL_STACK_CANARY), \reg @@ -186,26 +179,16 @@ .macro SAVE_ALL cld PUSH_GS - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0;*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0;*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0;*/ - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es @@ -215,30 +198,20 @@ .endm .macro RESTORE_INT_REGS - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %ecx - CFI_RESTORE ecx - popl_cfi %edx - CFI_RESTORE edx - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi - popl_cfi %ebp - CFI_RESTORE ebp - popl_cfi %eax - CFI_RESTORE eax + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax .endm .macro RESTORE_REGS pop=0 RESTORE_INT_REGS -1: popl_cfi %ds - /*CFI_RESTORE ds;*/ -2: popl_cfi %es - /*CFI_RESTORE es;*/ -3: popl_cfi %fs - /*CFI_RESTORE fs;*/ +1: popl %ds +2: popl %es +3: popl %fs POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) @@ -254,64 +227,27 @@ POP_GS_EX .endm -.macro RING0_INT_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 3*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_EC_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 4*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_PTREGS_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ - CFI_OFFSET eip, PT_EIP-PT_OLDESP - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ - CFI_OFFSET eax, PT_EAX-PT_OLDESP - CFI_OFFSET ebp, PT_EBP-PT_OLDESP - CFI_OFFSET edi, PT_EDI-PT_OLDESP - CFI_OFFSET esi, PT_ESI-PT_OLDESP - CFI_OFFSET edx, PT_EDX-PT_OLDESP - CFI_OFFSET ecx, PT_ECX-PT_OLDESP - CFI_OFFSET ebx, PT_EBX-PT_OLDESP -.endm - ENTRY(ret_from_fork) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax call schedule_tail GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi + popl %eax + pushl $0x0202 # Reset kernel eflags + popfl jmp syscall_exit - CFI_ENDPROC END(ret_from_fork) ENTRY(ret_from_kernel_thread) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax call schedule_tail GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi + popl %eax + pushl $0x0202 # Reset kernel eflags + popfl movl PT_EBP(%esp),%eax call *PT_EBX(%esp) movl $0,PT_EAX(%esp) jmp syscall_exit - CFI_ENDPROC ENDPROC(ret_from_kernel_thread) /* @@ -323,7 +259,6 @@ ENDPROC(ret_from_kernel_thread) # userspace resumption stub bypassing syscall exit tracing ALIGN - RING0_PTREGS_FRAME ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: @@ -367,17 +302,12 @@ need_resched: jmp need_resched END(resume_kernel) #endif - CFI_ENDPROC /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub ENTRY(ia32_sysenter_target) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* @@ -385,14 +315,11 @@ sysenter_past_esp: * enough kernel state to call TRACE_IRQS_OFF can be called - but * we immediately enable interrupts at that point anyway. */ - pushl_cfi $__USER_DS - /*CFI_REL_OFFSET ss, 0*/ - pushl_cfi %ebp - CFI_REL_OFFSET esp, 0 - pushfl_cfi + pushl $__USER_DS + pushl %ebp + pushfl orl $X86_EFLAGS_IF, (%esp) - pushl_cfi $__USER_CS - /*CFI_REL_OFFSET cs, 0*/ + pushl $__USER_CS /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary: TI_sysenter_return @@ -401,10 +328,9 @@ sysenter_past_esp: * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; * and THREAD_SIZE takes us to the bottom. */ - pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - CFI_REL_OFFSET eip, 0 + pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - pushl_cfi %eax + pushl %eax SAVE_ALL ENABLE_INTERRUPTS(CLBR_NONE) @@ -453,11 +379,11 @@ sysenter_audit: /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ - pushl_cfi PT_ESI(%esp) /* a3: 5th arg */ - pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */ + pushl PT_ESI(%esp) /* a3: 5th arg */ + pushl PT_EDX+4(%esp) /* a2: 4th arg */ call __audit_syscall_entry - popl_cfi %ecx /* get that remapped edx off the stack */ - popl_cfi %ecx /* get that remapped esi off the stack */ + popl %ecx /* get that remapped edx off the stack */ + popl %ecx /* get that remapped esi off the stack */ movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call @@ -480,7 +406,6 @@ sysexit_audit: jmp sysenter_exit #endif - CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) jmp 1b @@ -491,9 +416,8 @@ ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) - RING0_INT_FRAME # can't unwind into user space anyway ASM_CLAC - pushl_cfi %eax # save orig_eax + pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation @@ -527,7 +451,6 @@ restore_all_notrace: movb PT_CS(%esp), %al andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS #endif restore_nocheck: @@ -543,7 +466,6 @@ ENTRY(iret_exc) _ASM_EXTABLE(irq_return,iret_exc) #ifdef CONFIG_X86_ESPFIX32 - CFI_RESTORE_STATE ldt_ss: #ifdef CONFIG_PARAVIRT /* @@ -577,22 +499,19 @@ ldt_ss: shr $16, %edx mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ - pushl_cfi $__ESPFIX_SS - pushl_cfi %eax /* new kernel esp */ + pushl $__ESPFIX_SS + pushl %eax /* new kernel esp */ /* Disable interrupts, but do not irqtrace this section: we * will soon execute iret and the tracer was already set to * the irqstate after the iret */ DISABLE_INTERRUPTS(CLBR_EAX) lss (%esp), %esp /* switch to espfix segment */ - CFI_ADJUST_CFA_OFFSET -8 jmp restore_nocheck #endif - CFI_ENDPROC ENDPROC(system_call) # perform work that needs to be done immediately before resumption ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig @@ -634,9 +553,9 @@ work_notifysig: # deal with pending signals and #ifdef CONFIG_VM86 ALIGN work_notifysig_v86: - pushl_cfi %ecx # save ti_flags for do_notify_resume + pushl %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer - popl_cfi %ecx + popl %ecx movl %eax, %esp jmp 1b #endif @@ -666,9 +585,7 @@ syscall_exit_work: call syscall_trace_leave jmp resume_userspace END(syscall_exit_work) - CFI_ENDPROC - RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: ASM_CLAC GET_THREAD_INFO(%ebp) @@ -685,7 +602,6 @@ sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call END(sysenter_badsys) - CFI_ENDPROC .macro FIXUP_ESPFIX_STACK /* @@ -701,10 +617,9 @@ END(sysenter_badsys) mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ - pushl_cfi $__KERNEL_DS - pushl_cfi %eax + pushl $__KERNEL_DS + pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ - CFI_ADJUST_CFA_OFFSET -8 #endif .endm .macro UNWIND_ESPFIX_STACK @@ -728,13 +643,11 @@ END(sysenter_badsys) */ .align 8 ENTRY(irq_entries_start) - RING0_INT_FRAME vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ + pushl $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt - CFI_ADJUST_CFA_OFFSET -4 .align 8 .endr END(irq_entries_start) @@ -753,19 +666,16 @@ common_interrupt: call do_IRQ jmp ret_from_intr ENDPROC(common_interrupt) - CFI_ENDPROC #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ - RING0_INT_FRAME; \ ASM_CLAC; \ - pushl_cfi $~(nr); \ + pushl $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ call fn; \ jmp ret_from_intr; \ - CFI_ENDPROC; \ ENDPROC(name) @@ -784,37 +694,31 @@ ENDPROC(name) #include ENTRY(coprocessor_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_coprocessor_error + pushl $0 + pushl $do_coprocessor_error jmp error_code - CFI_ENDPROC END(coprocessor_error) ENTRY(simd_coprocessor_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 + pushl $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl_cfi $do_general_protection", \ + ALTERNATIVE "pushl $do_general_protection", \ "pushl $do_simd_coprocessor_error", \ X86_FEATURE_XMM #else - pushl_cfi $do_simd_coprocessor_error + pushl $do_simd_coprocessor_error #endif jmp error_code - CFI_ENDPROC END(simd_coprocessor_error) ENTRY(device_not_available) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $-1 # mark this as an int - pushl_cfi $do_device_not_available + pushl $-1 # mark this as an int + pushl $do_device_not_available jmp error_code - CFI_ENDPROC END(device_not_available) #ifdef CONFIG_PARAVIRT @@ -830,115 +734,89 @@ END(native_irq_enable_sysexit) #endif ENTRY(overflow) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_overflow + pushl $0 + pushl $do_overflow jmp error_code - CFI_ENDPROC END(overflow) ENTRY(bounds) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_bounds + pushl $0 + pushl $do_bounds jmp error_code - CFI_ENDPROC END(bounds) ENTRY(invalid_op) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_invalid_op + pushl $0 + pushl $do_invalid_op jmp error_code - CFI_ENDPROC END(invalid_op) ENTRY(coprocessor_segment_overrun) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_coprocessor_segment_overrun + pushl $0 + pushl $do_coprocessor_segment_overrun jmp error_code - CFI_ENDPROC END(coprocessor_segment_overrun) ENTRY(invalid_TSS) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_invalid_TSS + pushl $do_invalid_TSS jmp error_code - CFI_ENDPROC END(invalid_TSS) ENTRY(segment_not_present) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_segment_not_present + pushl $do_segment_not_present jmp error_code - CFI_ENDPROC END(segment_not_present) ENTRY(stack_segment) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_stack_segment + pushl $do_stack_segment jmp error_code - CFI_ENDPROC END(stack_segment) ENTRY(alignment_check) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_alignment_check + pushl $do_alignment_check jmp error_code - CFI_ENDPROC END(alignment_check) ENTRY(divide_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 # no error code - pushl_cfi $do_divide_error + pushl $0 # no error code + pushl $do_divide_error jmp error_code - CFI_ENDPROC END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi machine_check_vector + pushl $0 + pushl machine_check_vector jmp error_code - CFI_ENDPROC END(machine_check) #endif ENTRY(spurious_interrupt_bug) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_spurious_interrupt_bug + pushl $0 + pushl $do_spurious_interrupt_bug jmp error_code - CFI_ENDPROC END(spurious_interrupt_bug) #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ ENTRY(xen_sysenter_target) - RING0_INT_FRAME addl $5*4, %esp /* remove xen-provided frame */ - CFI_ADJUST_CFA_OFFSET -5*4 jmp sysenter_past_esp - CFI_ENDPROC ENTRY(xen_hypervisor_callback) - CFI_STARTPROC - pushl_cfi $-1 /* orig_ax = -1 => not a system call */ + pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -962,7 +840,6 @@ ENTRY(xen_do_upcall) call xen_maybe_preempt_hcall #endif jmp ret_from_intr - CFI_ENDPROC ENDPROC(xen_hypervisor_callback) # Hypervisor uses this for application faults while it executes. @@ -976,8 +853,7 @@ ENDPROC(xen_hypervisor_callback) # to pop the stack frame we end up in an infinite loop of failsafe callbacks. # We distinguish between categories by maintaining a status value in EAX. ENTRY(xen_failsafe_callback) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax movl $1,%eax 1: mov 4(%esp),%ds 2: mov 8(%esp),%es @@ -986,15 +862,13 @@ ENTRY(xen_failsafe_callback) /* EAX == 0 => Category 1 (Bad segment) EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax - popl_cfi %eax + popl %eax lea 16(%esp),%esp - CFI_ADJUST_CFA_OFFSET -16 jz 5f jmp iret_exc -5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ +5: pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception - CFI_ENDPROC .section .fixup,"ax" 6: xorl %eax,%eax @@ -1195,34 +1069,28 @@ return_to_handler: #ifdef CONFIG_TRACING ENTRY(trace_page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $trace_do_page_fault + pushl $trace_do_page_fault jmp error_code - CFI_ENDPROC END(trace_page_fault) #endif ENTRY(page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_page_fault + pushl $do_page_fault ALIGN error_code: /* the function address is in %gs's slot on the stack */ - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0*/ - pushl_cfi_reg eax - pushl_cfi_reg ebp - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg edx - pushl_cfi_reg ecx - pushl_cfi_reg ebx + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx cld movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs @@ -1240,7 +1108,6 @@ error_code: movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception - CFI_ENDPROC END(page_fault) /* @@ -1261,29 +1128,24 @@ END(page_fault) jne \ok \label: movl TSS_sysenter_sp0 + \offset(%esp), %esp - CFI_DEF_CFA esp, 0 - CFI_UNDEFINED eip - pushfl_cfi - pushl_cfi $__KERNEL_CS - pushl_cfi $sysenter_past_esp - CFI_REL_OFFSET eip, 0 + pushfl + pushl $__KERNEL_CS + pushl $sysenter_past_esp .endm ENTRY(debug) - RING0_INT_FRAME ASM_CLAC cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: - pushl_cfi $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception - CFI_ENDPROC END(debug) /* @@ -1295,45 +1157,40 @@ END(debug) * fault happened on the sysenter path. */ ENTRY(nmi) - RING0_INT_FRAME ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 - pushl_cfi %eax + pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax - popl_cfi %eax + popl %eax je nmi_espfix_stack #endif cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup - pushl_cfi %eax + pushl %eax movl %esp,%eax /* Do not access memory above the end of our stack page, * it might not exist. */ andl $(THREAD_SIZE-1),%eax cmpl $(THREAD_SIZE-20),%eax - popl_cfi %eax + popl %eax jae nmi_stack_correct cmpl $ia32_sysenter_target,12(%esp) je nmi_debug_stack_check nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl_cfi %eax + pushl %eax SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi jmp restore_all_notrace - CFI_ENDPROC nmi_stack_fixup: - RING0_INT_FRAME FIX_STACK 12, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -1345,57 +1202,48 @@ nmi_debug_stack_check: #ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * + /* * create the pointer to lss back */ - pushl_cfi %ss - pushl_cfi %esp + pushl %ss + pushl %esp addl $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 - pushl_cfi 16(%esp) + pushl 16(%esp) .endr - pushl_cfi %eax + pushl %eax SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 jmp irq_return #endif - CFI_ENDPROC END(nmi) ENTRY(int3) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception - CFI_ENDPROC END(int3) ENTRY(general_protection) - RING0_EC_FRAME - pushl_cfi $do_general_protection + pushl $do_general_protection jmp error_code - CFI_ENDPROC END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_async_page_fault + pushl $do_async_page_fault jmp error_code - CFI_ENDPROC END(async_page_fault) #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 47b95813dc37..b84cec50c8cf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -19,8 +19,6 @@ * at the top of the kernel process stack. * * Some macro usage: - * - CFI macros are used to generate dwarf2 unwind information for better - * backtraces. They don't change any code. * - ENTRY/END Define functions in the symbol table. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. * - idtentry - Define exception entry points. @@ -30,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -112,61 +109,6 @@ ENDPROC(native_usergs_sysret64) # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ #endif -/* - * empty frame - */ - .macro EMPTY_FRAME start=1 offset=0 - .if \start - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,8+\offset - .else - CFI_DEF_CFA_OFFSET 8+\offset - .endif - .endm - -/* - * initial frame state for interrupts (and exceptions without error code) - */ - .macro INTR_FRAME start=1 offset=0 - EMPTY_FRAME \start, 5*8+\offset - /*CFI_REL_OFFSET ss, 4*8+\offset*/ - CFI_REL_OFFSET rsp, 3*8+\offset - /*CFI_REL_OFFSET rflags, 2*8+\offset*/ - /*CFI_REL_OFFSET cs, 1*8+\offset*/ - CFI_REL_OFFSET rip, 0*8+\offset - .endm - -/* - * initial frame state for exceptions with error code (and interrupts - * with vector already pushed) - */ - .macro XCPT_FRAME start=1 offset=0 - INTR_FRAME \start, 1*8+\offset - .endm - -/* - * frame that enables passing a complete pt_regs to a C function. - */ - .macro DEFAULT_FRAME start=1 offset=0 - XCPT_FRAME \start, ORIG_RAX+\offset - CFI_REL_OFFSET rdi, RDI+\offset - CFI_REL_OFFSET rsi, RSI+\offset - CFI_REL_OFFSET rdx, RDX+\offset - CFI_REL_OFFSET rcx, RCX+\offset - CFI_REL_OFFSET rax, RAX+\offset - CFI_REL_OFFSET r8, R8+\offset - CFI_REL_OFFSET r9, R9+\offset - CFI_REL_OFFSET r10, R10+\offset - CFI_REL_OFFSET r11, R11+\offset - CFI_REL_OFFSET rbx, RBX+\offset - CFI_REL_OFFSET rbp, RBP+\offset - CFI_REL_OFFSET r12, R12+\offset - CFI_REL_OFFSET r13, R13+\offset - CFI_REL_OFFSET r14, R14+\offset - CFI_REL_OFFSET r15, R15+\offset - .endm - /* * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. * @@ -196,12 +138,6 @@ ENDPROC(native_usergs_sysret64) */ ENTRY(system_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -219,8 +155,8 @@ GLOBAL(system_call_after_swapgs) movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ - pushq_cfi $__USER_DS /* pt_regs->ss */ - pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ + pushq $__USER_DS /* pt_regs->ss */ + pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ /* * Re-enable interrupts. * We use 'rsp_scratch' as a scratch space, hence irq-off block above @@ -229,22 +165,20 @@ GLOBAL(system_call_after_swapgs) * with using rsp_scratch: */ ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %r11 /* pt_regs->flags */ - pushq_cfi $__USER_CS /* pt_regs->cs */ - pushq_cfi %rcx /* pt_regs->ip */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ - pushq_cfi_reg r8 /* pt_regs->r8 */ - pushq_cfi_reg r9 /* pt_regs->r9 */ - pushq_cfi_reg r10 /* pt_regs->r10 */ - pushq_cfi_reg r11 /* pt_regs->r11 */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 6*8 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz tracesys @@ -282,13 +216,9 @@ system_call_fastpath: testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ - CFI_REMEMBER_STATE - RESTORE_C_REGS_EXCEPT_RCX_R11 movq RIP(%rsp),%rcx - CFI_REGISTER rip,rcx movq EFLAGS(%rsp),%r11 - /*CFI_REGISTER rflags,r11*/ movq RSP(%rsp),%rsp /* * 64bit SYSRET restores rip from rcx, @@ -307,8 +237,6 @@ system_call_fastpath: */ USERGS_SYSRET64 - CFI_RESTORE_STATE - /* Do syscall entry tracing */ tracesys: movq %rsp, %rdi @@ -374,9 +302,9 @@ int_careful: jnc int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi + pushq %rdi SCHEDULE_USER - popq_cfi %rdi + popq %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check @@ -389,10 +317,10 @@ int_very_careful: /* Check for syscall exit trace */ testl $_TIF_WORK_SYSCALL_EXIT,%edx jz int_signal - pushq_cfi %rdi + pushq %rdi leaq 8(%rsp),%rdi # &ptregs -> arg1 call syscall_trace_leave - popq_cfi %rdi + popq %rdi andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi jmp int_restore_rest @@ -475,27 +403,21 @@ syscall_return: * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - CFI_REMEMBER_STATE /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 - CFI_RESTORE_STATE opportunistic_sysret_failed: SWAPGS jmp restore_c_regs_and_iret - CFI_ENDPROC END(system_call) .macro FORK_LIKE func ENTRY(stub_\func) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 /* offset 8: return address */ SAVE_EXTRA_REGS 8 jmp sys_\func - CFI_ENDPROC END(stub_\func) .endm @@ -504,8 +426,6 @@ END(stub_\func) FORK_LIKE vfork ENTRY(stub_execve) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call sys_execve return_from_execve: testl %eax, %eax @@ -515,11 +435,9 @@ return_from_execve: 1: /* must use IRET code path (pt_regs->cs may have changed) */ addq $8, %rsp - CFI_ADJUST_CFA_OFFSET -8 ZERO_EXTRA_REGS movq %rax,RAX(%rsp) jmp int_ret_from_sys_call - CFI_ENDPROC END(stub_execve) /* * Remaining execve stubs are only 7 bytes long. @@ -527,32 +445,23 @@ END(stub_execve) */ .align 8 GLOBAL(stub_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call sys_execveat jmp return_from_execve - CFI_ENDPROC END(stub_execveat) #if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) GLOBAL(stub32_execve) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve - CFI_ENDPROC END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) GLOBAL(stub32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve - CFI_ENDPROC END(stub32_execveat) END(stub_x32_execveat) #endif @@ -562,8 +471,6 @@ END(stub_x32_execveat) * This cannot be done with SYSRET, so use the IRET return path instead. */ ENTRY(stub_rt_sigreturn) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 /* * SAVE_EXTRA_REGS result is not normally needed: * sigreturn overwrites all pt_regs->GPREGS. @@ -575,21 +482,16 @@ ENTRY(stub_rt_sigreturn) call sys_rt_sigreturn return_from_stub: addq $8, %rsp - CFI_ADJUST_CFA_OFFSET -8 RESTORE_EXTRA_REGS movq %rax,RAX(%rsp) jmp int_ret_from_sys_call - CFI_ENDPROC END(stub_rt_sigreturn) #ifdef CONFIG_X86_X32_ABI ENTRY(stub_x32_rt_sigreturn) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 SAVE_EXTRA_REGS 8 call sys32_x32_rt_sigreturn jmp return_from_stub - CFI_ENDPROC END(stub_x32_rt_sigreturn) #endif @@ -599,12 +501,11 @@ END(stub_x32_rt_sigreturn) * rdi: prev task we switched from */ ENTRY(ret_from_fork) - DEFAULT_FRAME LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi $0x0002 - popfq_cfi # reset kernel eflags + pushq $0x0002 + popfq # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -628,7 +529,6 @@ ENTRY(ret_from_fork) movl $0, RAX(%rsp) RESTORE_EXTRA_REGS jmp int_ret_from_sys_call - CFI_ENDPROC END(ret_from_fork) /* @@ -637,16 +537,13 @@ END(ret_from_fork) */ .align 8 ENTRY(irq_entries_start) - INTR_FRAME vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ + pushq $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt - CFI_ADJUST_CFA_OFFSET -8 .align 8 .endr - CFI_ENDPROC END(irq_entries_start) /* @@ -688,17 +585,7 @@ END(irq_entries_start) movq %rsp, %rsi incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi pushq %rsi - /* - * For debugger: - * "CFA (Current Frame Address) is the value on stack + offset" - */ - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 (rsp) */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ - 0x22 /* DW_OP_plus */ /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -711,7 +598,6 @@ END(irq_entries_start) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - XCPT_FRAME ASM_CLAC addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ @@ -723,11 +609,8 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi - CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */ /* return code expects complete pt_regs - adjust rsp accordingly: */ leaq -RBP(%rsi),%rsp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET RBP testb $3, CS(%rsp) jz retint_kernel @@ -743,7 +626,6 @@ retint_check: LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: /* return to user-space */ @@ -807,8 +689,8 @@ native_irq_return_iret: #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: - pushq_cfi %rax - pushq_cfi %rdi + pushq %rax + pushq %rdi SWAPGS movq PER_CPU_VAR(espfix_waddr),%rdi movq %rax,(0*8)(%rdi) /* RAX */ @@ -823,24 +705,23 @@ native_irq_return_ldt: movq (5*8)(%rsp),%rax /* RSP */ movq %rax,(4*8)(%rdi) andl $0xffff0000,%eax - popq_cfi %rdi + popq %rdi orq PER_CPU_VAR(espfix_stack),%rax SWAPGS movq %rax,%rsp - popq_cfi %rax + popq %rax jmp native_irq_return_iret #endif /* edi: workmask, edx: work */ retint_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi + pushq %rdi SCHEDULE_USER - popq_cfi %rdi + popq %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -862,7 +743,6 @@ retint_signal: GET_THREAD_INFO(%rcx) jmp retint_with_reschedule - CFI_ENDPROC END(common_interrupt) /* @@ -870,13 +750,11 @@ END(common_interrupt) */ .macro apicinterrupt3 num sym do_sym ENTRY(\sym) - INTR_FRAME ASM_CLAC - pushq_cfi $~(\num) + pushq $~(\num) .Lcommon_\sym: interrupt \do_sym jmp ret_from_intr - CFI_ENDPROC END(\sym) .endm @@ -959,24 +837,17 @@ ENTRY(\sym) .error "using shift_ist requires paranoid=1" .endif - .if \has_error_code - XCPT_FRAME - .else - INTR_FRAME - .endif - ASM_CLAC PARAVIRT_ADJUST_EXCEPTION_FRAME .ifeq \has_error_code - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif ALLOC_PT_GPREGS_ON_STACK .if \paranoid .if \paranoid == 1 - CFI_REMEMBER_STATE testb $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif @@ -986,8 +857,6 @@ ENTRY(\sym) .endif /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - DEFAULT_FRAME 0 - .if \paranoid .if \shift_ist != -1 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ @@ -1023,7 +892,6 @@ ENTRY(\sym) .endif .if \paranoid == 1 - CFI_RESTORE_STATE /* * Paranoid entry from userspace. Switch stacks and treat it * as a normal entry. This means that paranoid handlers @@ -1032,7 +900,6 @@ ENTRY(\sym) 1: call error_entry - DEFAULT_FRAME 0 movq %rsp,%rdi /* pt_regs pointer */ call sync_regs @@ -1051,8 +918,6 @@ ENTRY(\sym) jmp error_exit /* %ebx: no swapgs flag */ .endif - - CFI_ENDPROC END(\sym) .endm @@ -1085,17 +950,15 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 /* Reload gs selector with exception handling */ /* edi: new selector */ ENTRY(native_load_gs_index) - CFI_STARTPROC - pushfq_cfi + pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs 2: mfence /* workaround */ SWAPGS - popfq_cfi + popfq ret - CFI_ENDPROC END(native_load_gs_index) _ASM_EXTABLE(gs_change,bad_gs) @@ -1110,22 +973,15 @@ bad_gs: /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) - CFI_STARTPROC - pushq_cfi %rbp - CFI_REL_OFFSET rbp,0 + pushq %rbp mov %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq - CFI_RESTORE rbp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET -8 decl PER_CPU_VAR(irq_count) ret - CFI_ENDPROC END(do_softirq_own_stack) #ifdef CONFIG_XEN @@ -1145,28 +1001,22 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 * activation and restart the handler using the previous one. */ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) - CFI_STARTPROC /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ movq %rdi, %rsp # we don't return, adjust the stack frame - CFI_ENDPROC - DEFAULT_FRAME 11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp - CFI_DEF_CFA_REGISTER rsp decl PER_CPU_VAR(irq_count) #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall #endif jmp error_exit - CFI_ENDPROC END(xen_do_hypervisor_callback) /* @@ -1183,16 +1033,8 @@ END(xen_do_hypervisor_callback) * with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) - INTR_FRAME 1 (6*8) - /*CFI_REL_OFFSET gs,GS*/ - /*CFI_REL_OFFSET fs,FS*/ - /*CFI_REL_OFFSET es,ES*/ - /*CFI_REL_OFFSET ds,DS*/ - CFI_REL_OFFSET r11,8 - CFI_REL_OFFSET rcx,0 movl %ds,%ecx cmpw %cx,0x10(%rsp) - CFI_REMEMBER_STATE jne 1f movl %es,%ecx cmpw %cx,0x18(%rsp) @@ -1205,29 +1047,21 @@ ENTRY(xen_failsafe_callback) jne 1f /* All segments match their saved values => Category 2 (Bad IRET). */ movq (%rsp),%rcx - CFI_RESTORE rcx movq 8(%rsp),%r11 - CFI_RESTORE r11 addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 /* RIP */ - pushq_cfi %r11 - pushq_cfi %rcx + pushq $0 /* RIP */ + pushq %r11 + pushq %rcx jmp general_protection - CFI_RESTORE_STATE 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp),%rcx - CFI_RESTORE rcx movq 8(%rsp),%r11 - CFI_RESTORE r11 addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $-1 /* orig_ax = -1 => not a system call */ + pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS jmp error_exit - CFI_ENDPROC END(xen_failsafe_callback) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1263,7 +1097,6 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) - XCPT_FRAME 1 15*8 cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1275,7 +1108,6 @@ ENTRY(paranoid_entry) SWAPGS xorl %ebx,%ebx 1: ret - CFI_ENDPROC END(paranoid_entry) /* @@ -1290,7 +1122,6 @@ END(paranoid_entry) */ /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) - DEFAULT_FRAME DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ @@ -1305,7 +1136,6 @@ paranoid_exit_restore: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 INTERRUPT_RETURN - CFI_ENDPROC END(paranoid_exit) /* @@ -1313,7 +1143,6 @@ END(paranoid_exit) * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(error_entry) - XCPT_FRAME 1 15*8 cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1333,7 +1162,6 @@ error_sti: * for these here too. */ error_kernelspace: - CFI_REL_OFFSET rcx, RCX+8 incl %ebx leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) @@ -1357,13 +1185,11 @@ error_bad_iret: mov %rax,%rsp decl %ebx /* Return to usergs */ jmp error_sti - CFI_ENDPROC END(error_entry) /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(error_exit) - DEFAULT_FRAME movl %ebx,%eax RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) @@ -1377,12 +1203,10 @@ ENTRY(error_exit) andl %edi,%edx jnz retint_careful jmp retint_swapgs - CFI_ENDPROC END(error_exit) /* Runs on exception stack */ ENTRY(nmi) - INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1417,8 +1241,7 @@ ENTRY(nmi) */ /* Use %rdx as our temp variable throughout */ - pushq_cfi %rdx - CFI_REL_OFFSET rdx, 0 + pushq %rdx /* * If %cs was not the kernel segment, then the NMI triggered in user @@ -1452,8 +1275,6 @@ ENTRY(nmi) jb first_nmi /* Ah, it is within the NMI stack, treat it as nested */ - CFI_REMEMBER_STATE - nested_nmi: /* * Do nothing if we interrupted the fixup in repeat_nmi. @@ -1471,26 +1292,22 @@ nested_nmi: /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp - CFI_ADJUST_CFA_OFFSET 1*8 leaq -10*8(%rsp), %rdx - pushq_cfi $__KERNEL_DS - pushq_cfi %rdx - pushfq_cfi - pushq_cfi $__KERNEL_CS - pushq_cfi $repeat_nmi + pushq $__KERNEL_DS + pushq %rdx + pushfq + pushq $__KERNEL_CS + pushq $repeat_nmi /* Put stack back */ addq $(6*8), %rsp - CFI_ADJUST_CFA_OFFSET -6*8 nested_nmi_out: - popq_cfi %rdx - CFI_RESTORE rdx + popq %rdx /* No need to check faults here */ INTERRUPT_RETURN - CFI_RESTORE_STATE first_nmi: /* * Because nested NMIs will use the pushed location that we @@ -1529,22 +1346,19 @@ first_nmi: */ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ movq (%rsp), %rdx - CFI_RESTORE rdx /* Set the NMI executing variable on the stack. */ - pushq_cfi $1 + pushq $1 /* * Leave room for the "copied" frame */ subq $(5*8), %rsp - CFI_ADJUST_CFA_OFFSET 5*8 /* Copy the stack frame to the Saved frame */ .rept 5 - pushq_cfi 11*8(%rsp) + pushq 11*8(%rsp) .endr - CFI_DEF_CFA_OFFSET 5*8 /* Everything up to here is safe from nested NMIs */ @@ -1567,12 +1381,10 @@ repeat_nmi: /* Make another copy, this one may be modified by nested NMIs */ addq $(10*8), %rsp - CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 - pushq_cfi -6*8(%rsp) + pushq -6*8(%rsp) .endr subq $(5*8), %rsp - CFI_DEF_CFA_OFFSET 5*8 end_repeat_nmi: /* @@ -1580,7 +1392,7 @@ end_repeat_nmi: * NMI if the first NMI took an exception and reset our iret stack * so that we repeat another NMI. */ - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK /* @@ -1591,7 +1403,6 @@ end_repeat_nmi: * exceptions might do. */ call paranoid_entry - DEFAULT_FRAME 0 /* * Save off the CR2 register. If we take a page fault in the NMI then @@ -1628,13 +1439,10 @@ nmi_restore: /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) jmp irq_return - CFI_ENDPROC END(nmi) ENTRY(ignore_sysret) - CFI_STARTPROC mov $-ENOSYS,%eax sysret - CFI_ENDPROC END(ignore_sysret) diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 00933d5e992f..9b0ca8fe80fc 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -11,26 +11,23 @@ #include #include -#include /* if you want SMP support, implement these with real spinlocks */ .macro LOCK reg - pushfl_cfi + pushfl cli .endm .macro UNLOCK reg - popfl_cfi + popfl .endm #define BEGIN(op) \ .macro endp; \ - CFI_ENDPROC; \ ENDPROC(atomic64_##op##_386); \ .purgem endp; \ .endm; \ ENTRY(atomic64_##op##_386); \ - CFI_STARTPROC; \ LOCK v; #define ENDP endp diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 082a85167a5b..db3ae85440ff 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -11,7 +11,6 @@ #include #include -#include .macro read64 reg movl %ebx, %eax @@ -22,16 +21,11 @@ .endm ENTRY(atomic64_read_cx8) - CFI_STARTPROC - read64 %ecx ret - CFI_ENDPROC ENDPROC(atomic64_read_cx8) ENTRY(atomic64_set_cx8) - CFI_STARTPROC - 1: /* we don't need LOCK_PREFIX since aligned 64-bit writes * are atomic on 586 and newer */ @@ -39,28 +33,23 @@ ENTRY(atomic64_set_cx8) jne 1b ret - CFI_ENDPROC ENDPROC(atomic64_set_cx8) ENTRY(atomic64_xchg_cx8) - CFI_STARTPROC - 1: LOCK_PREFIX cmpxchg8b (%esi) jne 1b ret - CFI_ENDPROC ENDPROC(atomic64_xchg_cx8) .macro addsub_return func ins insc ENTRY(atomic64_\func\()_return_cx8) - CFI_STARTPROC - pushl_cfi_reg ebp - pushl_cfi_reg ebx - pushl_cfi_reg esi - pushl_cfi_reg edi + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi movl %eax, %esi movl %edx, %edi @@ -79,12 +68,11 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg edi - popl_cfi_reg esi - popl_cfi_reg ebx - popl_cfi_reg ebp + popl %edi + popl %esi + popl %ebx + popl %ebp ret - CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) .endm @@ -93,8 +81,7 @@ addsub_return sub sub sbb .macro incdec_return func ins insc ENTRY(atomic64_\func\()_return_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -109,9 +96,8 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) .endm @@ -119,8 +105,7 @@ incdec_return inc add adc incdec_return dec sub sbb ENTRY(atomic64_dec_if_positive_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -136,18 +121,16 @@ ENTRY(atomic64_dec_if_positive_cx8) 2: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_dec_if_positive_cx8) ENTRY(atomic64_add_unless_cx8) - CFI_STARTPROC - pushl_cfi_reg ebp - pushl_cfi_reg ebx + pushl %ebp + pushl %ebx /* these just push these two parameters on the stack */ - pushl_cfi_reg edi - pushl_cfi_reg ecx + pushl %edi + pushl %ecx movl %eax, %ebp movl %edx, %edi @@ -168,21 +151,18 @@ ENTRY(atomic64_add_unless_cx8) movl $1, %eax 3: addl $8, %esp - CFI_ADJUST_CFA_OFFSET -8 - popl_cfi_reg ebx - popl_cfi_reg ebp + popl %ebx + popl %ebp ret 4: cmpl %edx, 4(%esp) jne 2b xorl %eax, %eax jmp 3b - CFI_ENDPROC ENDPROC(atomic64_add_unless_cx8) ENTRY(atomic64_inc_not_zero_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -199,7 +179,6 @@ ENTRY(atomic64_inc_not_zero_cx8) movl $1, %eax 3: - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 9bc944a91274..c1e623209853 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -26,7 +26,6 @@ */ #include -#include #include #include @@ -50,9 +49,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) * alignment for the unrolled loop. */ ENTRY(csum_partial) - CFI_STARTPROC - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %esi + pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: unsigned char *buff @@ -129,10 +127,9 @@ ENTRY(csum_partial) jz 8f roll $8, %eax 8: - popl_cfi_reg ebx - popl_cfi_reg esi + popl %ebx + popl %esi ret - CFI_ENDPROC ENDPROC(csum_partial) #else @@ -140,9 +137,8 @@ ENDPROC(csum_partial) /* Version for PentiumII/PPro */ ENTRY(csum_partial) - CFI_STARTPROC - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %esi + pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: const unsigned char *buf @@ -249,10 +245,9 @@ ENTRY(csum_partial) jz 90f roll $8, %eax 90: - popl_cfi_reg ebx - popl_cfi_reg esi + popl %ebx + popl %esi ret - CFI_ENDPROC ENDPROC(csum_partial) #endif @@ -287,12 +282,10 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define FP 12 ENTRY(csum_partial_copy_generic) - CFI_STARTPROC subl $4,%esp - CFI_ADJUST_CFA_OFFSET 4 - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %edi + pushl %esi + pushl %ebx movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len movl ARGBASE+4(%esp),%esi # src @@ -401,12 +394,11 @@ DST( movb %cl, (%edi) ) .previous - popl_cfi_reg ebx - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi %ecx # equivalent to addl $4,%esp + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp ret - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) #else @@ -426,10 +418,9 @@ ENDPROC(csum_partial_copy_generic) #define ARGBASE 12 ENTRY(csum_partial_copy_generic) - CFI_STARTPROC - pushl_cfi_reg ebx - pushl_cfi_reg edi - pushl_cfi_reg esi + pushl %ebx + pushl %edi + pushl %esi movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst movl ARGBASE+12(%esp),%ecx #len @@ -489,11 +480,10 @@ DST( movb %dl, (%edi) ) jmp 7b .previous - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi_reg ebx + popl %esi + popl %edi + popl %ebx ret - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) #undef ROUND diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index e67e579c93bd..a2fe51b00cce 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,4 @@ #include -#include #include #include @@ -15,7 +14,6 @@ * %rdi - page */ ENTRY(clear_page) - CFI_STARTPROC ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ "jmp clear_page_c_e", X86_FEATURE_ERMS @@ -24,11 +22,9 @@ ENTRY(clear_page) xorl %eax,%eax rep stosq ret - CFI_ENDPROC ENDPROC(clear_page) ENTRY(clear_page_orig) - CFI_STARTPROC xorl %eax,%eax movl $4096/64,%ecx @@ -48,14 +44,11 @@ ENTRY(clear_page_orig) jnz .Lloop nop ret - CFI_ENDPROC ENDPROC(clear_page_orig) ENTRY(clear_page_c_e) - CFI_STARTPROC movl $4096,%ecx xorl %eax,%eax rep stosb ret - CFI_ENDPROC ENDPROC(clear_page_c_e) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 40a172541ee2..9b330242e740 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -6,7 +6,6 @@ * */ #include -#include #include .text @@ -21,7 +20,6 @@ * %al : Operation successful */ ENTRY(this_cpu_cmpxchg16b_emu) -CFI_STARTPROC # # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not @@ -32,7 +30,7 @@ CFI_STARTPROC # *atomic* on a single cpu (as provided by the this_cpu_xx class of # macros). # - pushfq_cfi + pushfq cli cmpq PER_CPU_VAR((%rsi)), %rax @@ -43,17 +41,13 @@ CFI_STARTPROC movq %rbx, PER_CPU_VAR((%rsi)) movq %rcx, PER_CPU_VAR(8(%rsi)) - CFI_REMEMBER_STATE - popfq_cfi + popfq mov $1, %al ret - CFI_RESTORE_STATE .Lnot_same: - popfq_cfi + popfq xor %al,%al ret -CFI_ENDPROC - ENDPROC(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index b4807fce5177..ad5349778490 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -7,7 +7,6 @@ */ #include -#include .text @@ -20,14 +19,13 @@ * %ecx : high 32 bits of new value */ ENTRY(cmpxchg8b_emu) -CFI_STARTPROC # # Emulate 'cmpxchg8b (%esi)' on UP except we don't # set the whole ZF thing (caller will just compare # eax:edx with the expected value) # - pushfl_cfi + pushfl cli cmpl (%esi), %eax @@ -38,18 +36,15 @@ CFI_STARTPROC movl %ebx, (%esi) movl %ecx, 4(%esi) - CFI_REMEMBER_STATE - popfl_cfi + popfl ret - CFI_RESTORE_STATE .Lnot_same: movl (%esi), %eax .Lhalf_same: movl 4(%esi), %edx - popfl_cfi + popfl ret -CFI_ENDPROC ENDPROC(cmpxchg8b_emu) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 8239dbcbf984..009f98216b7e 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -1,7 +1,6 @@ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ #include -#include #include #include @@ -13,22 +12,16 @@ */ ALIGN ENTRY(copy_page) - CFI_STARTPROC ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq ret - CFI_ENDPROC ENDPROC(copy_page) ENTRY(copy_page_regs) - CFI_STARTPROC subq $2*8, %rsp - CFI_ADJUST_CFA_OFFSET 2*8 movq %rbx, (%rsp) - CFI_REL_OFFSET rbx, 0 movq %r12, 1*8(%rsp) - CFI_REL_OFFSET r12, 1*8 movl $(4096/64)-5, %ecx .p2align 4 @@ -87,11 +80,7 @@ ENTRY(copy_page_regs) jnz .Loop2 movq (%rsp), %rbx - CFI_RESTORE rbx movq 1*8(%rsp), %r12 - CFI_RESTORE r12 addq $2*8, %rsp - CFI_ADJUST_CFA_OFFSET -2*8 ret - CFI_ENDPROC ENDPROC(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index e4b3beee83bd..982ce34f4a9b 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -18,7 +17,6 @@ /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) - CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx @@ -30,12 +28,10 @@ ENTRY(_copy_to_user) X86_FEATURE_REP_GOOD, \ "jmp copy_user_enhanced_fast_string", \ X86_FEATURE_ERMS - CFI_ENDPROC ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) - CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx @@ -47,14 +43,12 @@ ENTRY(_copy_from_user) X86_FEATURE_REP_GOOD, \ "jmp copy_user_enhanced_fast_string", \ X86_FEATURE_ERMS - CFI_ENDPROC ENDPROC(_copy_from_user) .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) bad_from_user: - CFI_STARTPROC movl %edx,%ecx xorl %eax,%eax rep @@ -62,7 +56,6 @@ bad_from_user: bad_to_user: movl %edx,%eax ret - CFI_ENDPROC ENDPROC(bad_from_user) .previous @@ -80,7 +73,6 @@ ENDPROC(bad_from_user) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_generic_unrolled) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ @@ -162,7 +154,6 @@ ENTRY(copy_user_generic_unrolled) _ASM_EXTABLE(19b,40b) _ASM_EXTABLE(21b,50b) _ASM_EXTABLE(22b,50b) - CFI_ENDPROC ENDPROC(copy_user_generic_unrolled) /* Some CPUs run faster using the string copy instructions. @@ -184,7 +175,6 @@ ENDPROC(copy_user_generic_unrolled) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_generic_string) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 2f /* less than 8 bytes, go to byte copy loop */ @@ -209,7 +199,6 @@ ENTRY(copy_user_generic_string) _ASM_EXTABLE(1b,11b) _ASM_EXTABLE(3b,12b) - CFI_ENDPROC ENDPROC(copy_user_generic_string) /* @@ -225,7 +214,6 @@ ENDPROC(copy_user_generic_string) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_enhanced_fast_string) - CFI_STARTPROC ASM_STAC movl %edx,%ecx 1: rep @@ -240,7 +228,6 @@ ENTRY(copy_user_enhanced_fast_string) .previous _ASM_EXTABLE(1b,12b) - CFI_ENDPROC ENDPROC(copy_user_enhanced_fast_string) /* @@ -248,7 +235,6 @@ ENDPROC(copy_user_enhanced_fast_string) * This will force destination/source out of cache for more performance. */ ENTRY(__copy_user_nocache) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ @@ -332,5 +318,4 @@ ENTRY(__copy_user_nocache) _ASM_EXTABLE(19b,40b) _ASM_EXTABLE(21b,50b) _ASM_EXTABLE(22b,50b) - CFI_ENDPROC ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 9734182966f3..7e48807b2fa1 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -6,7 +6,6 @@ * for more details. No warranty for anything given at all. */ #include -#include #include #include @@ -47,23 +46,16 @@ ENTRY(csum_partial_copy_generic) - CFI_STARTPROC cmpl $3*64, %edx jle .Lignore .Lignore: subq $7*8, %rsp - CFI_ADJUST_CFA_OFFSET 7*8 movq %rbx, 2*8(%rsp) - CFI_REL_OFFSET rbx, 2*8 movq %r12, 3*8(%rsp) - CFI_REL_OFFSET r12, 3*8 movq %r14, 4*8(%rsp) - CFI_REL_OFFSET r14, 4*8 movq %r13, 5*8(%rsp) - CFI_REL_OFFSET r13, 5*8 movq %rbp, 6*8(%rsp) - CFI_REL_OFFSET rbp, 6*8 movq %r8, (%rsp) movq %r9, 1*8(%rsp) @@ -206,22 +198,14 @@ ENTRY(csum_partial_copy_generic) addl %ebx, %eax adcl %r9d, %eax /* carry */ - CFI_REMEMBER_STATE .Lende: movq 2*8(%rsp), %rbx - CFI_RESTORE rbx movq 3*8(%rsp), %r12 - CFI_RESTORE r12 movq 4*8(%rsp), %r14 - CFI_RESTORE r14 movq 5*8(%rsp), %r13 - CFI_RESTORE r13 movq 6*8(%rsp), %rbp - CFI_RESTORE rbp addq $7*8, %rsp - CFI_ADJUST_CFA_OFFSET -7*8 ret - CFI_RESTORE_STATE /* Exception handlers. Very simple, zeroing is done in the wrappers */ .Lbad_source: @@ -237,5 +221,4 @@ ENTRY(csum_partial_copy_generic) jz .Lende movl $-EFAULT, (%rax) jmp .Lende - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a4512359656a..46668cda4ffd 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -26,7 +26,6 @@ */ #include -#include #include #include #include @@ -36,7 +35,6 @@ .text ENTRY(__get_user_1) - CFI_STARTPROC GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user @@ -45,11 +43,9 @@ ENTRY(__get_user_1) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_1) ENTRY(__get_user_2) - CFI_STARTPROC add $1,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) @@ -60,11 +56,9 @@ ENTRY(__get_user_2) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) - CFI_STARTPROC add $3,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) @@ -75,11 +69,9 @@ ENTRY(__get_user_4) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_4) ENTRY(__get_user_8) - CFI_STARTPROC #ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user @@ -104,28 +96,23 @@ ENTRY(__get_user_8) ASM_CLAC ret #endif - CFI_ENDPROC ENDPROC(__get_user_8) bad_get_user: - CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX ASM_CLAC ret - CFI_ENDPROC END(bad_get_user) #ifdef CONFIG_X86_32 bad_get_user_8: - CFI_STARTPROC xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX ASM_CLAC ret - CFI_ENDPROC END(bad_get_user_8) #endif diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 05a95e713da8..33147fef3452 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -16,15 +16,12 @@ */ #include -#include /* * override generic version in lib/iomap_copy.c */ ENTRY(__iowrite32_copy) - CFI_STARTPROC movl %edx,%ecx rep movsd ret - CFI_ENDPROC ENDPROC(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index b046664f5a1c..16698bba87de 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -2,7 +2,6 @@ #include #include -#include #include /* @@ -53,7 +52,6 @@ ENTRY(memcpy_erms) ENDPROC(memcpy_erms) ENTRY(memcpy_orig) - CFI_STARTPROC movq %rdi, %rax cmpq $0x20, %rdx @@ -178,5 +176,4 @@ ENTRY(memcpy_orig) .Lend: retq - CFI_ENDPROC ENDPROC(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0f8a0d0331b9..ca2afdd6d98e 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -6,7 +6,6 @@ * - Copyright 2011 Fenghua Yu */ #include -#include #include #include @@ -27,7 +26,6 @@ ENTRY(memmove) ENTRY(__memmove) - CFI_STARTPROC /* Handle more 32 bytes in loop */ mov %rdi, %rax @@ -207,6 +205,5 @@ ENTRY(__memmove) movb %r11b, (%rdi) 13: retq - CFI_ENDPROC ENDPROC(__memmove) ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 93118fb23976..2661fad05827 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -1,7 +1,6 @@ /* Copyright 2002 Andi Kleen, SuSE Labs */ #include -#include #include #include @@ -66,7 +65,6 @@ ENTRY(memset_erms) ENDPROC(memset_erms) ENTRY(memset_orig) - CFI_STARTPROC movq %rdi,%r10 /* expand byte value */ @@ -78,7 +76,6 @@ ENTRY(memset_orig) movl %edi,%r9d andl $7,%r9d jnz .Lbad_alignment - CFI_REMEMBER_STATE .Lafter_bad_alignment: movq %rdx,%rcx @@ -128,7 +125,6 @@ ENTRY(memset_orig) movq %r10,%rax ret - CFI_RESTORE_STATE .Lbad_alignment: cmpq $7,%rdx jbe .Lhandle_7 @@ -139,5 +135,4 @@ ENTRY(memset_orig) subq %r8,%rdx jmp .Lafter_bad_alignment .Lfinal: - CFI_ENDPROC ENDPROC(memset_orig) diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 3ca5218fbece..c81556409bbb 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -13,9 +12,8 @@ */ .macro op_safe_regs op ENTRY(\op\()_safe_regs) - CFI_STARTPROC - pushq_cfi_reg rbx - pushq_cfi_reg rbp + pushq %rbx + pushq %rbp movq %rdi, %r10 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax @@ -25,7 +23,6 @@ ENTRY(\op\()_safe_regs) movl 20(%rdi), %ebp movl 24(%rdi), %esi movl 28(%rdi), %edi - CFI_REMEMBER_STATE 1: \op 2: movl %eax, (%r10) movl %r11d, %eax /* Return value */ @@ -35,16 +32,14 @@ ENTRY(\op\()_safe_regs) movl %ebp, 20(%r10) movl %esi, 24(%r10) movl %edi, 28(%r10) - popq_cfi_reg rbp - popq_cfi_reg rbx + popq %rbp + popq %rbx ret 3: - CFI_RESTORE_STATE movl $-EIO, %r11d jmp 2b _ASM_EXTABLE(1b, 3b) - CFI_ENDPROC ENDPROC(\op\()_safe_regs) .endm @@ -52,13 +47,12 @@ ENDPROC(\op\()_safe_regs) .macro op_safe_regs op ENTRY(\op\()_safe_regs) - CFI_STARTPROC - pushl_cfi_reg ebx - pushl_cfi_reg ebp - pushl_cfi_reg esi - pushl_cfi_reg edi - pushl_cfi $0 /* Return value */ - pushl_cfi %eax + pushl %ebx + pushl %ebp + pushl %esi + pushl %edi + pushl $0 /* Return value */ + pushl %eax movl 4(%eax), %ecx movl 8(%eax), %edx movl 12(%eax), %ebx @@ -66,32 +60,28 @@ ENTRY(\op\()_safe_regs) movl 24(%eax), %esi movl 28(%eax), %edi movl (%eax), %eax - CFI_REMEMBER_STATE 1: \op -2: pushl_cfi %eax +2: pushl %eax movl 4(%esp), %eax - popl_cfi (%eax) + popl (%eax) addl $4, %esp - CFI_ADJUST_CFA_OFFSET -4 movl %ecx, 4(%eax) movl %edx, 8(%eax) movl %ebx, 12(%eax) movl %ebp, 20(%eax) movl %esi, 24(%eax) movl %edi, 28(%eax) - popl_cfi %eax - popl_cfi_reg edi - popl_cfi_reg esi - popl_cfi_reg ebp - popl_cfi_reg ebx + popl %eax + popl %edi + popl %esi + popl %ebp + popl %ebx ret 3: - CFI_RESTORE_STATE movl $-EIO, 4(%esp) jmp 2b _ASM_EXTABLE(1b, 3b) - CFI_ENDPROC ENDPROC(\op\()_safe_regs) .endm diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index fc6ba17a7eec..e0817a12d323 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -11,7 +11,6 @@ * return value. */ #include -#include #include #include #include @@ -30,11 +29,9 @@ * as they get called from within inline assembly. */ -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) +#define ENTER GET_THREAD_INFO(%_ASM_BX) #define EXIT ASM_CLAC ; \ - ret ; \ - CFI_ENDPROC + ret .text ENTRY(__put_user_1) @@ -87,7 +84,6 @@ ENTRY(__put_user_8) ENDPROC(__put_user_8) bad_put_user: - CFI_STARTPROC movl $-EFAULT,%eax EXIT END(bad_put_user) diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 2322abe4da3b..40027db99140 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -15,7 +15,6 @@ #include #include -#include #define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) #define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) @@ -34,10 +33,10 @@ */ #define save_common_regs \ - pushl_cfi_reg ecx + pushl %ecx #define restore_common_regs \ - popl_cfi_reg ecx + popl %ecx /* Avoid uglifying the argument copying x86-64 needs to do. */ .macro movq src, dst @@ -64,50 +63,45 @@ */ #define save_common_regs \ - pushq_cfi_reg rdi; \ - pushq_cfi_reg rsi; \ - pushq_cfi_reg rcx; \ - pushq_cfi_reg r8; \ - pushq_cfi_reg r9; \ - pushq_cfi_reg r10; \ - pushq_cfi_reg r11 + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 #define restore_common_regs \ - popq_cfi_reg r11; \ - popq_cfi_reg r10; \ - popq_cfi_reg r9; \ - popq_cfi_reg r8; \ - popq_cfi_reg rcx; \ - popq_cfi_reg rsi; \ - popq_cfi_reg rdi + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi #endif /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) - CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(push,) %__ASM_REG(dx) movq %rax,%rdi call rwsem_down_read_failed - __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(pop,) %__ASM_REG(dx) restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) - CFI_STARTPROC save_common_regs movq %rax,%rdi call rwsem_down_write_failed restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) - CFI_STARTPROC /* do nothing if still outstanding active readers */ __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) jnz 1f @@ -116,17 +110,14 @@ ENTRY(call_rwsem_wake) call rwsem_wake restore_common_regs 1: ret - CFI_ENDPROC ENDPROC(call_rwsem_wake) ENTRY(call_rwsem_downgrade_wake) - CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(push,) %__ASM_REG(dx) movq %rax,%rdi call rwsem_downgrade_wake - __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(pop,) %__ASM_REG(dx) restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 5eb715087b80..e9acf5f4fc92 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -6,16 +6,14 @@ */ #include #include - #include /* put return address in eax (arg1) */ .macro THUNK name, func, put_ret_addr_in_eax=0 .globl \name \name: - CFI_STARTPROC - pushl_cfi_reg eax - pushl_cfi_reg ecx - pushl_cfi_reg edx + pushl %eax + pushl %ecx + pushl %edx .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ @@ -23,11 +21,10 @@ .endif call \func - popl_cfi_reg edx - popl_cfi_reg ecx - popl_cfi_reg eax + popl %edx + popl %ecx + popl %eax ret - CFI_ENDPROC _ASM_NOKPROBE(\name) .endm diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index f89ba4e93025..10f555e435e1 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -6,7 +6,6 @@ * Subject to the GNU public license, v.2. No warranty of any kind. */ #include -#include #include #include @@ -14,27 +13,25 @@ .macro THUNK name, func, put_ret_addr_in_rdi=0 .globl \name \name: - CFI_STARTPROC /* this one pushes 9 elems, the next one would be %rIP */ - pushq_cfi_reg rdi - pushq_cfi_reg rsi - pushq_cfi_reg rdx - pushq_cfi_reg rcx - pushq_cfi_reg rax - pushq_cfi_reg r8 - pushq_cfi_reg r9 - pushq_cfi_reg r10 - pushq_cfi_reg r11 + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 .if \put_ret_addr_in_rdi /* 9*8(%rsp) is return addr on stack */ - movq_cfi_restore 9*8, rdi + movq 9*8(%rsp), %rdi .endif call \func jmp restore - CFI_ENDPROC _ASM_NOKPROBE(\name) .endm @@ -57,19 +54,16 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) - CFI_STARTPROC - CFI_ADJUST_CFA_OFFSET 9*8 restore: - popq_cfi_reg r11 - popq_cfi_reg r10 - popq_cfi_reg r9 - popq_cfi_reg r8 - popq_cfi_reg rax - popq_cfi_reg rcx - popq_cfi_reg rdx - popq_cfi_reg rsi - popq_cfi_reg rdi + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi ret - CFI_ENDPROC _ASM_NOKPROBE(restore) #endif diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 6440221ced0d..4093216b3791 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -8,7 +8,6 @@ * of the License. */ #include -#include /* * Calling convention : From 2bf557ea3f49576fabe24cd5daf1a34e9ee22c3c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 1 Jun 2015 13:02:55 +0100 Subject: [PATCH 427/481] x86/asm/entry/64: Use negative immediates for stack adjustments Doing so allows adjustments by 128 bytes (occurring for REMOVE_PT_GPREGS_FROM_STACK 8 uses) to be expressed with a single byte immediate. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/556C660F020000780007FB60@mail.emea.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0d76accde45b..f4e6308c4200 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -89,7 +89,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 - subq $15*8+\addskip, %rsp + addq $-(15*8+\addskip), %rsp .endm .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 @@ -201,7 +201,7 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 - addq $15*8+\addskip, %rsp + subq $-(15*8+\addskip), %rsp .endm .macro icebp From 2f63b9db7260beba3c19d66d6c11b0b78ea84a8c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 1 Jun 2015 13:03:59 +0100 Subject: [PATCH 428/481] x86/asm/entry/64: Fold identical code paths retint_kernel doesn't require %rcx to be pointing to thread info (anymore?), and the code on the two alternative paths is - not really surprisingly - identical. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/556C664F020000780007FB64@mail.emea.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b84cec50c8cf..4ad79e946f5a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -615,7 +615,7 @@ ret_from_intr: testb $3, CS(%rsp) jz retint_kernel /* Interrupt came from user space */ - +retint_user: GET_THREAD_INFO(%rcx) /* * %rcx: thread info. Interrupts off. @@ -1194,15 +1194,9 @@ ENTRY(error_exit) RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - GET_THREAD_INFO(%rcx) testl %eax,%eax jnz retint_kernel - LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - movl $_TIF_WORK_MASK,%edi - andl %edi,%edx - jnz retint_careful - jmp retint_swapgs + jmp retint_user END(error_exit) /* Runs on exception stack */ From 92ae18371cb1abb4e186dd9d48de2bb0d9bba626 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Jun 2015 15:38:27 +0200 Subject: [PATCH 429/481] lockdep: Do not break user-visible string Remove the line-break in the user-visible string and add the missing space in this error message: WARNING: lockdep init error! lock-(console_sem).lock was acquiredbefore lockdep_init Also: - don't yell, it's just a debug warning - denote references to function calls with '()' - standardize the lock name quoting - and finish the sentence. The result: WARNING: lockdep init error: lock '(console_sem).lock' was acquired before lockdep_init(). Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150602133827.GD19887@pd.tnic [ Added a few more stylistic tweaks to the error message. ] Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a0831e1b99f4..a61bb1d37a52 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4066,8 +4066,7 @@ void __init lockdep_info(void) #ifdef CONFIG_DEBUG_LOCKDEP if (lockdep_init_error) { - printk("WARNING: lockdep init error! lock-%s was acquired" - "before lockdep_init\n", lock_init_error); + printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error); printk("Call stack leading to lockdep invocation was:\n"); print_stack_trace(&lockdep_init_trace, 0); } From 6471b825c41e6fc3cd41caa18d15142d0e121e76 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 10:00:13 +0200 Subject: [PATCH 430/481] x86/kconfig: Reorganize arch feature Kconfig select's Peter Zijstra noticed that in arch/x86/Kconfig there are a lot of X86_{32,64} clauses in the X86 symbol, plus there are a number of similar selects in the X86_32 and X86_64 config definitions as well - which all overlap in an inconsistent mess. So: - move all select's from X86_32 and X86_64 to the X64 config option - sort their names, so that duplications are easier to spot - align their if clauses, so that they are easier to identify at a glance - and so that weirdnesses stand out more No change in functionality: 105 insertions(+) 105 deletions(-) Originally-from: Peter Zijlstra Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150602153027.GU3644@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 214 +++++++++++++++++++++++------------------------ 1 file changed, 107 insertions(+), 107 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a708bcc1615c..8a5cca39e74f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -9,141 +9,141 @@ config 64BIT config X86_32 def_bool y depends on !64BIT - select CLKSRC_I8253 - select HAVE_UID16 config X86_64 def_bool y depends on 64BIT - select X86_DEV_DMA_OPS - select ARCH_USE_CMPXCHG_LOCKREF - select HAVE_LIVEPATCH ### Arch settings config X86 def_bool y - select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI + select ACPI_LEGACY_TABLES_LOOKUP if ACPI + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ANON_INODES + select ARCH_CLOCKSOURCE_DATA + select ARCH_DISCARD_MEMBLOCK + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SG_CHAIN + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO - select HAVE_AOUT if X86_32 - select HAVE_UNSTABLE_SCHED_CLOCK - select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 - select ARCH_SUPPORTS_INT128 if X86_64 - select HAVE_IDE - select HAVE_OPROFILE - select HAVE_PCSPKR_PLATFORM - select HAVE_PERF_EVENTS - select HAVE_IOREMAP_PROT - select HAVE_KPROBES - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP - select ARCH_DISCARD_MEMBLOCK - select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_INT128 if X86_64 + select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if X86_64 + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_IPC_PARSE_VERSION if X86_32 + select ARCH_WANT_OPTIONAL_GPIOLIB + select BUILDTIME_EXTABLE_SORT + select CLKEVT_I8253 + select CLKSRC_I8253 if X86_32 + select CLOCKSOURCE_VALIDATE_LAST_CYCLE + select CLOCKSOURCE_WATCHDOG + select CLONE_BACKWARDS if X86_32 + select COMPAT_OLD_SIGACTION if IA32_EMULATION + select DCACHE_WORD_ACCESS + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) + select GENERIC_CLOCKEVENTS_MIN_ADJUST + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_EARLY_IOREMAP + select GENERIC_FIND_FIRST_BIT + select GENERIC_IOMAP + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_PENDING_IRQ if SMP + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL + select HAVE_ACPI_APEI if ACPI + select HAVE_ACPI_APEI_NMI if ACPI + select HAVE_ALIGNED_STRUCT_PAGE if SLUB + select HAVE_AOUT if X86_32 + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP + select HAVE_ARCH_KGDB + select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SOFT_DIRTY if X86_64 + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_BPF_JIT if X86_64 + select HAVE_CC_STACKPROTECTOR + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL + select HAVE_CONTEXT_TRACKING if X86_64 + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS - select HAVE_KRETPROBES - select GENERIC_EARLY_IOREMAP - select HAVE_OPTPROBES - select HAVE_KPROBES_ON_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FENTRY if X86_64 - select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_FP_TEST - select HAVE_SYSCALL_TRACEPOINTS - select SYSCTL_EXCEPTION_TRACE - select HAVE_KVM - select HAVE_ARCH_KGDB - select HAVE_ARCH_TRACEHOOK - select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS - select USER_STACKTRACE_SUPPORT - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_DMA_API_DEBUG - select HAVE_KERNEL_GZIP - select HAVE_KERNEL_BZIP2 - select HAVE_KERNEL_LZMA - select HAVE_KERNEL_XZ - select HAVE_KERNEL_LZO - select HAVE_KERNEL_LZ4 + select HAVE_FENTRY if X86_64 + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_FP_TEST + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_HW_BREAKPOINT + select HAVE_IDE + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES + select HAVE_KPROBES_ON_FTRACE + select HAVE_KRETPROBES + select HAVE_KVM + select HAVE_LIVEPATCH if X86_64 + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP select HAVE_MIXED_BREAKPOINTS_REGS - select PERF_EVENTS + select HAVE_OPROFILE + select HAVE_OPTPROBES + select HAVE_PCSPKR_PLATFORM + select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_DEBUG_KMEMLEAK - select ANON_INODES - select HAVE_ALIGNED_STRUCT_PAGE if SLUB - select HAVE_CMPXCHG_LOCAL - select HAVE_CMPXCHG_DOUBLE - select HAVE_ARCH_KMEMCHECK - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UID16 if X86_32 + select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER - select ARCH_HAS_ELF_RANDOMIZE - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select SPARSE_IRQ - select GENERIC_FIND_FIRST_BIT - select GENERIC_IRQ_PROBE - select GENERIC_PENDING_IRQ if SMP - select GENERIC_IRQ_SHOW - select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING - select HAVE_BPF_JIT if X86_64 - select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE - select ARCH_HAS_SG_CHAIN - select CLKEVT_I8253 - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select GENERIC_IOMAP - select DCACHE_WORD_ACCESS - select GENERIC_SMP_IDLE_THREAD - select ARCH_WANT_IPC_PARSE_VERSION if X86_32 - select HAVE_ARCH_SECCOMP_FILTER - select BUILDTIME_EXTABLE_SORT - select GENERIC_CMOS_UPDATE - select HAVE_ARCH_SOFT_DIRTY if X86_64 - select CLOCKSOURCE_WATCHDOG - select GENERIC_CLOCKEVENTS - select ARCH_CLOCKSOURCE_DATA - select CLOCKSOURCE_VALIDATE_LAST_CYCLE - select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) - select GENERIC_TIME_VSYSCALL - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_CONTEXT_TRACKING if X86_64 - select HAVE_IRQ_TIME_ACCOUNTING - select VIRT_TO_BUS - select MODULES_USE_ELF_REL if X86_32 - select MODULES_USE_ELF_RELA if X86_64 - select CLONE_BACKWARDS if X86_32 - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_USE_QUEUED_RWLOCKS - select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION - select OLD_SIGACTION if X86_32 - select COMPAT_OLD_SIGACTION if IA32_EMULATION + select MODULES_USE_ELF_RELA if X86_64 + select MODULES_USE_ELF_REL if X86_32 + select OLD_SIGACTION if X86_32 + select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION + select PERF_EVENTS select RTC_LIB - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 - select HAVE_CC_STACKPROTECTOR - select GENERIC_CPU_AUTOPROBE - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select HAVE_ACPI_APEI if ACPI - select HAVE_ACPI_APEI_NMI if ACPI - select ACPI_LEGACY_TABLES_LOOKUP if ACPI - select X86_FEATURE_NAMES if PROC_FS + select SPARSE_IRQ select SRCU + select SYSCTL_EXCEPTION_TRACE + select USER_STACKTRACE_SUPPORT + select VIRT_TO_BUS + select X86_DEV_DMA_OPS if X86_64 + select X86_FEATURE_NAMES if PROC_FS config INSTRUCTION_DECODER def_bool y From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 2 Jun 2015 19:01:38 +1000 Subject: [PATCH 431/481] x86/mm: Decouple from Nothing in uses anything from , so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include explicitly and relied on the implicit inclusion via . Also add: - to - to ... which were two other implicit header file dependencies. Suggested-by: David Miller Signed-off-by: Stephen Rothwell [ Tidied up the changelog. ] Acked-by: David Miller Acked-by: Takashi Iwai Acked-by: Viresh Kumar Acked-by: Vinod Koul Cc: Andrew Morton Cc: Anton Vorontsov Cc: Boris Ostrovsky Cc: Colin Cross Cc: David Vrabel Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: James E.J. Bottomley Cc: Jaroslav Kysela Cc: K. Y. Srinivasan Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Suma Ramars Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +-- arch/x86/kernel/crash.c | 1 + arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/mm/pageattr-test.c | 1 + arch/x86/mm/pageattr.c | 1 + arch/x86/xen/p2m.c | 1 + drivers/acpi/apei/erst.c | 1 + drivers/cpufreq/intel_pstate.c | 1 + drivers/dma/mic_x100_dma.c | 1 + drivers/net/hyperv/netvsc.c | 1 + drivers/net/hyperv/rndis_filter.c | 1 + drivers/scsi/fnic/fnic_debugfs.c | 1 + drivers/scsi/fnic/fnic_trace.c | 1 + include/linux/io.h | 1 + sound/pci/asihpi/hpioctl.c | 1 + 15 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a2b97404922d..a94463063b46 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -40,6 +40,7 @@ #include #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -198,8 +199,6 @@ extern void set_iounmap_nonlazy(void); #include -#include - /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1..e068d6683dba 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 415480d3ea84..11546b462fa6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 6629f397b467..8ff686aa7e8c 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 70d221fe2eb4..fae3c5366ac0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b47124d4cd67..8b7f18e200aa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index ed65e9c4b5b0..3670bbab57a3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "apei-internal.h" diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c4..2ba53f4f6af2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c index 6de2e677be04..74d9db05a5ad 100644 --- a/drivers/dma/mic_x100_dma.c +++ b/drivers/dma/mic_x100_dma.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "mic_x100_dma.h" diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ea091bc5ff09..1e09243d5449 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "hyperv_net.h" diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 9118cea91882..35a482d526d9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "hyperv_net.h" diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c index 5980c10c734d..d6498fabe628 100644 --- a/drivers/scsi/fnic/fnic_debugfs.c +++ b/drivers/scsi/fnic/fnic_debugfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "fnic.h" static struct dentry *fnic_trace_debugfs_root; diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index 65a9bde26974..4e15c4bf0795 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fnic_io.h" #include "fnic.h" diff --git a/include/linux/io.h b/include/linux/io.h index 04cce4da3685..fb5a99800e77 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,6 +19,7 @@ #define _LINUX_IO_H #include +#include #include #include diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 6610bd096fc9..d17937b92331 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef MODULE_FIRMWARE MODULE_FIRMWARE("asihpi/dsp5000.bin"); From 905a36a2851838bca5a424fb758e201990234e6e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 13:37:36 +0200 Subject: [PATCH 432/481] x86/asm/entry: Move entry_64.S and entry_32.S to arch/x86/entry/ Create a new directory hierarchy for the low level x86 entry code: arch/x86/entry/* This will host all the low level glue that is currently scattered all across arch/x86/. Start with entry_64.S and entry_32.S. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kbuild | 3 +++ arch/x86/entry/Makefile | 4 ++++ arch/x86/{kernel => entry}/entry_32.S | 0 arch/x86/{kernel => entry}/entry_64.S | 0 arch/x86/kernel/Makefile | 2 +- 5 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 arch/x86/entry/Makefile rename arch/x86/{kernel => entry}/entry_32.S (100%) rename arch/x86/{kernel => entry}/entry_64.S (100%) diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 3942f74c92d7..b9b816277e72 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,3 +1,6 @@ + +obj-y += entry/ + obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile new file mode 100644 index 000000000000..fa7e0cf6d3c4 --- /dev/null +++ b/arch/x86/entry/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for the x86 low level entry code +# +obj-y := entry_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/entry/entry_32.S similarity index 100% rename from arch/x86/kernel/entry_32.S rename to arch/x86/entry/entry_32.S diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/entry/entry_64.S similarity index 100% rename from arch/x86/kernel/entry_64.S rename to arch/x86/entry/entry_64.S diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9bcd0b56ca17..9d3ee054453d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,7 +22,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace -obj-y := process_$(BITS).o signal.o entry_$(BITS).o +obj-y := process_$(BITS).o signal.o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o From 19a433f45135656d065bdf1bbe543796b194ba3a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:00:59 +0200 Subject: [PATCH 433/481] x86/asm/entry: Move the compat syscall entry code to arch/x86/entry/ Move the ia32entry.S file over into arch/x86/entry/. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 5 ++++- arch/x86/{ia32 => entry}/ia32entry.S | 0 arch/x86/ia32/Makefile | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) rename arch/x86/{ia32 => entry}/ia32entry.S (100%) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index fa7e0cf6d3c4..4a626594fa79 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -1,4 +1,7 @@ # # Makefile for the x86 low level entry code # -obj-y := entry_$(BITS).o +obj-y := entry_$(BITS).o + +obj-$(CONFIG_IA32_EMULATION) += ia32entry.o + diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/entry/ia32entry.S similarity index 100% rename from arch/x86/ia32/ia32entry.S rename to arch/x86/entry/ia32entry.S diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index bb635c641869..cd4339bae066 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -2,7 +2,7 @@ # Makefile for the ia32 kernel emulation subsystem. # -obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o +obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_signal.o obj-$(CONFIG_IA32_AOUT) += ia32_aout.o From d603c8e184d882cc3f55c599f4185bad956ee0a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:05:44 +0200 Subject: [PATCH 434/481] x86/asm/entry, x86/vdso: Move the vDSO code to arch/x86/entry/vdso/ Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- MAINTAINERS | 2 +- arch/x86/Kbuild | 2 +- arch/x86/Makefile | 2 +- arch/x86/entry/Makefile | 2 ++ arch/x86/{ => entry}/vdso/.gitignore | 0 arch/x86/{ => entry}/vdso/Makefile | 0 arch/x86/{ => entry}/vdso/checkundef.sh | 0 arch/x86/{ => entry}/vdso/vclock_gettime.c | 0 arch/x86/{ => entry}/vdso/vdso-layout.lds.S | 0 arch/x86/{ => entry}/vdso/vdso-note.S | 0 arch/x86/{ => entry}/vdso/vdso.lds.S | 0 arch/x86/{ => entry}/vdso/vdso2c.c | 0 arch/x86/{ => entry}/vdso/vdso2c.h | 0 arch/x86/{ => entry}/vdso/vdso32-setup.c | 0 arch/x86/{ => entry}/vdso/vdso32/.gitignore | 0 arch/x86/{ => entry}/vdso/vdso32/int80.S | 0 arch/x86/{ => entry}/vdso/vdso32/note.S | 0 arch/x86/{ => entry}/vdso/vdso32/sigreturn.S | 0 arch/x86/{ => entry}/vdso/vdso32/syscall.S | 0 arch/x86/{ => entry}/vdso/vdso32/sysenter.S | 0 arch/x86/{ => entry}/vdso/vdso32/vclock_gettime.c | 0 arch/x86/{ => entry}/vdso/vdso32/vdso-fakesections.c | 0 arch/x86/{ => entry}/vdso/vdso32/vdso32.lds.S | 0 arch/x86/{ => entry}/vdso/vdsox32.lds.S | 0 arch/x86/{ => entry}/vdso/vgetcpu.c | 0 arch/x86/{ => entry}/vdso/vma.c | 0 26 files changed, 5 insertions(+), 3 deletions(-) rename arch/x86/{ => entry}/vdso/.gitignore (100%) rename arch/x86/{ => entry}/vdso/Makefile (100%) rename arch/x86/{ => entry}/vdso/checkundef.sh (100%) rename arch/x86/{ => entry}/vdso/vclock_gettime.c (100%) rename arch/x86/{ => entry}/vdso/vdso-layout.lds.S (100%) rename arch/x86/{ => entry}/vdso/vdso-note.S (100%) rename arch/x86/{ => entry}/vdso/vdso.lds.S (100%) rename arch/x86/{ => entry}/vdso/vdso2c.c (100%) rename arch/x86/{ => entry}/vdso/vdso2c.h (100%) rename arch/x86/{ => entry}/vdso/vdso32-setup.c (100%) rename arch/x86/{ => entry}/vdso/vdso32/.gitignore (100%) rename arch/x86/{ => entry}/vdso/vdso32/int80.S (100%) rename arch/x86/{ => entry}/vdso/vdso32/note.S (100%) rename arch/x86/{ => entry}/vdso/vdso32/sigreturn.S (100%) rename arch/x86/{ => entry}/vdso/vdso32/syscall.S (100%) rename arch/x86/{ => entry}/vdso/vdso32/sysenter.S (100%) rename arch/x86/{ => entry}/vdso/vdso32/vclock_gettime.c (100%) rename arch/x86/{ => entry}/vdso/vdso32/vdso-fakesections.c (100%) rename arch/x86/{ => entry}/vdso/vdso32/vdso32.lds.S (100%) rename arch/x86/{ => entry}/vdso/vdsox32.lds.S (100%) rename arch/x86/{ => entry}/vdso/vgetcpu.c (100%) rename arch/x86/{ => entry}/vdso/vma.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index f8e0afb708b4..68c0cc365432 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10892,7 +10892,7 @@ M: Andy Lutomirski L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso S: Maintained -F: arch/x86/vdso/ +F: arch/x86/entry/vdso/ XC2028/3028 TUNER DRIVER M: Mauro Carvalho Chehab diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index b9b816277e72..1538562cc720 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -14,7 +14,7 @@ obj-y += kernel/ obj-y += mm/ obj-y += crypto/ -obj-y += vdso/ + obj-$(CONFIG_IA32_EMULATION) += ia32/ obj-y += platform/ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 43e8328a23e4..90b1c3bfec46 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -244,7 +244,7 @@ install: PHONY += vdso_install vdso_install: - $(Q)$(MAKE) $(build)=arch/x86/vdso $@ + $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@ archclean: $(Q)rm -rf $(objtree)/arch/i386 diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 4a626594fa79..c97532492ef5 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -3,5 +3,7 @@ # obj-y := entry_$(BITS).o +obj-y += vdso/ + obj-$(CONFIG_IA32_EMULATION) += ia32entry.o diff --git a/arch/x86/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore similarity index 100% rename from arch/x86/vdso/.gitignore rename to arch/x86/entry/vdso/.gitignore diff --git a/arch/x86/vdso/Makefile b/arch/x86/entry/vdso/Makefile similarity index 100% rename from arch/x86/vdso/Makefile rename to arch/x86/entry/vdso/Makefile diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh similarity index 100% rename from arch/x86/vdso/checkundef.sh rename to arch/x86/entry/vdso/checkundef.sh diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c similarity index 100% rename from arch/x86/vdso/vclock_gettime.c rename to arch/x86/entry/vdso/vclock_gettime.c diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S similarity index 100% rename from arch/x86/vdso/vdso-layout.lds.S rename to arch/x86/entry/vdso/vdso-layout.lds.S diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S similarity index 100% rename from arch/x86/vdso/vdso-note.S rename to arch/x86/entry/vdso/vdso-note.S diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S similarity index 100% rename from arch/x86/vdso/vdso.lds.S rename to arch/x86/entry/vdso/vdso.lds.S diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c similarity index 100% rename from arch/x86/vdso/vdso2c.c rename to arch/x86/entry/vdso/vdso2c.c diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h similarity index 100% rename from arch/x86/vdso/vdso2c.h rename to arch/x86/entry/vdso/vdso2c.h diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c similarity index 100% rename from arch/x86/vdso/vdso32-setup.c rename to arch/x86/entry/vdso/vdso32-setup.c diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore similarity index 100% rename from arch/x86/vdso/vdso32/.gitignore rename to arch/x86/entry/vdso/vdso32/.gitignore diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S similarity index 100% rename from arch/x86/vdso/vdso32/int80.S rename to arch/x86/entry/vdso/vdso32/int80.S diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S similarity index 100% rename from arch/x86/vdso/vdso32/note.S rename to arch/x86/entry/vdso/vdso32/note.S diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S similarity index 100% rename from arch/x86/vdso/vdso32/sigreturn.S rename to arch/x86/entry/vdso/vdso32/sigreturn.S diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S similarity index 100% rename from arch/x86/vdso/vdso32/syscall.S rename to arch/x86/entry/vdso/vdso32/syscall.S diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S similarity index 100% rename from arch/x86/vdso/vdso32/sysenter.S rename to arch/x86/entry/vdso/vdso32/sysenter.S diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c similarity index 100% rename from arch/x86/vdso/vdso32/vclock_gettime.c rename to arch/x86/entry/vdso/vdso32/vclock_gettime.c diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c similarity index 100% rename from arch/x86/vdso/vdso32/vdso-fakesections.c rename to arch/x86/entry/vdso/vdso32/vdso-fakesections.c diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S similarity index 100% rename from arch/x86/vdso/vdso32/vdso32.lds.S rename to arch/x86/entry/vdso/vdso32/vdso32.lds.S diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S similarity index 100% rename from arch/x86/vdso/vdsox32.lds.S rename to arch/x86/entry/vdso/vdsox32.lds.S diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c similarity index 100% rename from arch/x86/vdso/vgetcpu.c rename to arch/x86/entry/vdso/vgetcpu.c diff --git a/arch/x86/vdso/vma.c b/arch/x86/entry/vdso/vma.c similarity index 100% rename from arch/x86/vdso/vma.c rename to arch/x86/entry/vdso/vma.c From e6b93f4e48af8fe8373ec1132b8c7787890e7578 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:10:43 +0200 Subject: [PATCH 435/481] x86/asm/entry: Move the 'thunk' functions to arch/x86/entry/ These are all calling x86 entry code functions, so move them close to other entry code. Change lib-y to obj-y: there's no real difference between the two as we don't really drop any of them during the linking stage, and obj-y is the more common approach for core kernel object code. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 3 +-- arch/x86/{lib => entry}/thunk_32.S | 0 arch/x86/{lib => entry}/thunk_64.S | 0 arch/x86/lib/Makefile | 1 - arch/x86/um/Makefile | 2 +- 5 files changed, 2 insertions(+), 4 deletions(-) rename arch/x86/{lib => entry}/thunk_32.S (100%) rename arch/x86/{lib => entry}/thunk_64.S (100%) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index c97532492ef5..9df72c8a0ac2 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -1,8 +1,7 @@ # # Makefile for the x86 low level entry code # -obj-y := entry_$(BITS).o - +obj-y := entry_$(BITS).o thunk_$(BITS).o obj-y += vdso/ obj-$(CONFIG_IA32_EMULATION) += ia32entry.o diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/entry/thunk_32.S similarity index 100% rename from arch/x86/lib/thunk_32.S rename to arch/x86/entry/thunk_32.S diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/entry/thunk_64.S similarity index 100% rename from arch/x86/lib/thunk_64.S rename to arch/x86/entry/thunk_64.S diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 982989d282ff..f2587888d987 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -17,7 +17,6 @@ clean-files := inat-tables.c obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o misc.o cmdline.o -lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index acb384d24669..a8fecc226946 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -26,7 +26,7 @@ else obj-y += syscalls_64.o vdso/ -subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ +subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \ ../lib/rwsem.o endif From d36f947904dba0935a0e74dc9df2ef57caeb7d03 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:29:26 +0200 Subject: [PATCH 436/481] x86/asm/entry: Move arch/x86/include/asm/calling.h to arch/x86/entry/ asm/calling.h is private to the entry code, make this more apparent by moving it to the new arch/x86/entry/ directory. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/{include/asm => entry}/calling.h | 0 arch/x86/entry/entry_64.S | 2 +- arch/x86/entry/ia32entry.S | 2 +- arch/x86/entry/thunk_64.S | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename arch/x86/{include/asm => entry}/calling.h (100%) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/entry/calling.h similarity index 100% rename from arch/x86/include/asm/calling.h rename to arch/x86/entry/calling.h diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4ad79e946f5a..f7380ea75777 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -28,7 +28,7 @@ #include #include #include -#include +#include "calling.h" #include #include #include diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 2be23c734db5..f16767417385 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -4,7 +4,7 @@ * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include +#include "calling.h" #include #include #include diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 10f555e435e1..3e95681b4e2d 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -6,7 +6,7 @@ * Subject to the GNU public license, v.2. No warranty of any kind. */ #include -#include +#include "calling.h" #include /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ From 1f57d5d85ba7f1f467173ff33f51d01a91f9aaf1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:36:41 +0200 Subject: [PATCH 437/481] x86/asm/entry: Move the arch/x86/syscalls/ definitions to arch/x86/entry/syscalls/ The build time generated syscall definitions are entry code related, move them into the arch/x86/entry/ directory. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 2 +- arch/x86/{ => entry}/syscalls/Makefile | 4 ++-- arch/x86/{ => entry}/syscalls/syscall_32.tbl | 0 arch/x86/{ => entry}/syscalls/syscall_64.tbl | 0 arch/x86/{ => entry}/syscalls/syscallhdr.sh | 0 arch/x86/{ => entry}/syscalls/syscalltbl.sh | 0 scripts/checksyscalls.sh | 2 +- 7 files changed, 4 insertions(+), 4 deletions(-) rename arch/x86/{ => entry}/syscalls/Makefile (95%) rename arch/x86/{ => entry}/syscalls/syscall_32.tbl (100%) rename arch/x86/{ => entry}/syscalls/syscall_64.tbl (100%) rename arch/x86/{ => entry}/syscalls/syscallhdr.sh (100%) rename arch/x86/{ => entry}/syscalls/syscalltbl.sh (100%) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 90b1c3bfec46..118e6debc483 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -181,7 +181,7 @@ archscripts: scripts_basic # Syscall table generation archheaders: - $(Q)$(MAKE) $(build)=arch/x86/syscalls all + $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all archprepare: ifeq ($(CONFIG_KEXEC_FILE),y) diff --git a/arch/x86/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile similarity index 95% rename from arch/x86/syscalls/Makefile rename to arch/x86/entry/syscalls/Makefile index a55abb9f6c5e..57aa59fd140c 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -1,5 +1,5 @@ -out := $(obj)/../include/generated/asm -uapi := $(obj)/../include/generated/uapi/asm +out := $(obj)/../../include/generated/asm +uapi := $(obj)/../../include/generated/uapi/asm # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl similarity index 100% rename from arch/x86/syscalls/syscall_32.tbl rename to arch/x86/entry/syscalls/syscall_32.tbl diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl similarity index 100% rename from arch/x86/syscalls/syscall_64.tbl rename to arch/x86/entry/syscalls/syscall_64.tbl diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh similarity index 100% rename from arch/x86/syscalls/syscallhdr.sh rename to arch/x86/entry/syscalls/syscallhdr.sh diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh similarity index 100% rename from arch/x86/syscalls/syscalltbl.sh rename to arch/x86/entry/syscalls/syscalltbl.sh diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index 5b3add31f9f1..2c9082ba6137 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -212,5 +212,5 @@ EOF ) } -(ignore_list && syscall_list $(dirname $0)/../arch/x86/syscalls/syscall_32.tbl) | \ +(ignore_list && syscall_list $(dirname $0)/../arch/x86/entry/syscalls/syscall_32.tbl) | \ $* -E -x c - > /dev/null From 00398a0018d1334fedabfeaabd0fa563121de612 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:41:06 +0200 Subject: [PATCH 438/481] x86/asm/entry: Move the vsyscall code to arch/x86/entry/vsyscall/ The vsyscall code is entry code too, so move it to arch/x86/entry/vsyscall/. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 6 ++++-- arch/x86/{kernel => entry}/syscall_32.c | 0 arch/x86/{kernel => entry}/syscall_64.c | 0 arch/x86/entry/vsyscall/Makefile | 7 +++++++ arch/x86/{kernel => entry/vsyscall}/vsyscall_64.c | 0 arch/x86/{kernel => entry/vsyscall}/vsyscall_emu_64.S | 0 arch/x86/{kernel => entry/vsyscall}/vsyscall_gtod.c | 0 arch/x86/{kernel => entry/vsyscall}/vsyscall_trace.h | 2 +- arch/x86/kernel/Makefile | 3 --- 9 files changed, 12 insertions(+), 6 deletions(-) rename arch/x86/{kernel => entry}/syscall_32.c (100%) rename arch/x86/{kernel => entry}/syscall_64.c (100%) create mode 100644 arch/x86/entry/vsyscall/Makefile rename arch/x86/{kernel => entry/vsyscall}/vsyscall_64.c (100%) rename arch/x86/{kernel => entry/vsyscall}/vsyscall_emu_64.S (100%) rename arch/x86/{kernel => entry/vsyscall}/vsyscall_gtod.c (100%) rename arch/x86/{kernel => entry/vsyscall}/vsyscall_trace.h (89%) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 9df72c8a0ac2..b93cce1c6bb2 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -1,8 +1,10 @@ # # Makefile for the x86 low level entry code # -obj-y := entry_$(BITS).o thunk_$(BITS).o +obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o + obj-y += vdso/ +obj-y += vsyscall/ -obj-$(CONFIG_IA32_EMULATION) += ia32entry.o +obj-$(CONFIG_IA32_EMULATION) += ia32entry.o syscall_32.o diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/entry/syscall_32.c similarity index 100% rename from arch/x86/kernel/syscall_32.c rename to arch/x86/entry/syscall_32.c diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/entry/syscall_64.c similarity index 100% rename from arch/x86/kernel/syscall_64.c rename to arch/x86/entry/syscall_64.c diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile new file mode 100644 index 000000000000..a9f4856f622a --- /dev/null +++ b/arch/x86/entry/vsyscall/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the x86 low level vsyscall code +# +obj-y := vsyscall_gtod.o + +obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o + diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c similarity index 100% rename from arch/x86/kernel/vsyscall_64.c rename to arch/x86/entry/vsyscall/vsyscall_64.c diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S similarity index 100% rename from arch/x86/kernel/vsyscall_emu_64.S rename to arch/x86/entry/vsyscall/vsyscall_emu_64.S diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c similarity index 100% rename from arch/x86/kernel/vsyscall_gtod.c rename to arch/x86/entry/vsyscall/vsyscall_gtod.c diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/entry/vsyscall/vsyscall_trace.h similarity index 89% rename from arch/x86/kernel/vsyscall_trace.h rename to arch/x86/entry/vsyscall/vsyscall_trace.h index a8b2edec54fe..9dd7359a38a8 100644 --- a/arch/x86/kernel/vsyscall_trace.h +++ b/arch/x86/entry/vsyscall/vsyscall_trace.h @@ -24,6 +24,6 @@ TRACE_EVENT(emulate_vsyscall, #endif #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../arch/x86/kernel +#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/ #define TRACE_INCLUDE_FILE vsyscall_trace #include diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9d3ee054453d..01663ee5f1b7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -31,9 +31,6 @@ obj-y += probe_roms.o obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += mcount_64.o -obj-y += syscall_$(BITS).o vsyscall_gtod.o -obj-$(CONFIG_IA32_EMULATION) += syscall_32.o -obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o From cf991de2f614f454b3cb2a300c06ecdf69f3a70d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Jun 2015 17:13:44 -0700 Subject: [PATCH 439/481] x86/asm/msr: Make wrmsrl_safe() a function The wrmsrl_safe macro performs invalid shifts if the value argument is 32 bits. This makes it unnecessarily awkward to write code that puts an unsigned long into an MSR. Convert it to a real inline function. For inspiration, see: 7c74d5b7b7b6 ("x86/asm/entry/64: Fix MSR_IA32_SYSENTER_CS MSR value"). Signed-off-by: Andy Lutomirski Cc: Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner [ Applied small improvements. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index de36f22eb0b9..13bf576c401d 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -205,8 +205,13 @@ do { \ #endif /* !CONFIG_PARAVIRT */ -#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ - (u32)((val) >> 32)) +/* + * 64-bit version of wrmsr_safe(): + */ +static inline int wrmsrl_safe(u32 msr, u64 val) +{ + return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); +} #define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) From 5ca6f70f387b4f82903037cc3c5488e2c97dcdbc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Jun 2015 13:24:29 -0700 Subject: [PATCH 440/481] x86/asm/entry/64: Remove pointless jump to irq_return INTERRUPT_RETURN turns into a jmp instruction. There's no need for extra indirection. Signed-off-by: Andy Lutomirski Cc: Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2f2318653dbad284a59311f13f08cea71298fd7c.1433449436.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f7380ea75777..31770662b940 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -663,8 +663,6 @@ retint_kernel: restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 - -irq_return: INTERRUPT_RETURN ENTRY(native_iret) @@ -1432,7 +1430,7 @@ nmi_restore: /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) - jmp irq_return + INTERRUPT_RETURN END(nmi) ENTRY(ignore_sysret) From 61b1e3e782d6784b714c0d80de529e0737d0e79c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 2 Jun 2015 19:35:10 +0200 Subject: [PATCH 441/481] x86/asm/entry/32: Simplify the zeroing of pt_regs->r8..r11 in the int80 code path 32-bit syscall entry points do not save the complete pt_regs struct, they leave some fields uninitialized. However, they must be careful to not leak uninitialized data in pt_regs->r8..r11 to ptrace users. CLEAR_RREGS macro is used to zero these fields out when needed. However, in the int80 code path this zeroing is unconditional. This patch simplifies it by storing zeroes there right away, when pt_regs is constructed on stack. This uses shorter instructions: text data bss dec hex filename 1423 0 0 1423 58f ia32entry.o.before 1407 0 0 1407 57f ia32entry.o Compile-tested. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433266510-2938-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index f16767417385..f00a409dc403 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -421,6 +421,10 @@ ia32_badarg: movq $-EFAULT,%rax jmp ia32_sysret +ia32_ret_from_sys_call: + CLEAR_RREGS + jmp int_ret_from_sys_call + /* * Emulated IA32 system calls via int 0x80. * @@ -462,8 +466,12 @@ ENTRY(ia32_syscall) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ + pushq $0 /* pt_regs->r8 */ + pushq $0 /* pt_regs->r9 */ + pushq $0 /* pt_regs->r10 */ + pushq $0 /* pt_regs->r11 */ cld - sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -481,13 +489,10 @@ ia32_do_call: ia32_sysret: movq %rax,RAX(%rsp) 1: -ia32_ret_from_sys_call: - CLEAR_RREGS jmp int_ret_from_sys_call ia32_tracesys: SAVE_EXTRA_REGS - CLEAR_RREGS movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ From ef0cd5dc25404594f832dad9133abae52e3b2fa3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 2 Jun 2015 21:04:01 +0200 Subject: [PATCH 442/481] x86/asm/entry/32: Open-code CLEAR_RREGS This macro is small, has only four callsites, and one of them is slightly different using a conditional parameter. A few saved lines aren't worth the resulting obfuscation. Generated machine code is identical. Signed-off-by: Denys Vlasenko [ Added comments. ] Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433271842-9139-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index f00a409dc403..8a45d2cd2bef 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -29,15 +29,6 @@ .section .entry.text, "ax" - /* clobbers %rax */ - .macro CLEAR_RREGS _r9=rax - xorl %eax,%eax - movq %rax,R11(%rsp) - movq %rax,R10(%rsp) - movq %\_r9,R9(%rsp) - movq %rax,R8(%rsp) - .endm - /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %eax because syscall_trace_enter() returned @@ -243,7 +234,11 @@ sysexit_from_sys_call: TRACE_IRQS_OFF testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jz \exit - CLEAR_RREGS + xorl %eax, %eax /* do not leak kernel information */ + movq %rax, R11(%rsp) + movq %rax, R10(%rsp) + movq %rax, R9(%rsp) + movq %rax, R8(%rsp) jmp int_with_check .endm @@ -267,7 +262,11 @@ sysenter_tracesys: jz sysenter_auditsys #endif SAVE_EXTRA_REGS - CLEAR_RREGS + xorl %eax, %eax /* do not leak kernel information */ + movq %rax, R11(%rsp) + movq %rax, R10(%rsp) + movq %rax, R9(%rsp) + movq %rax, R8(%rsp) movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ @@ -407,7 +406,11 @@ cstar_tracesys: #endif xchgl %r9d,%ebp SAVE_EXTRA_REGS - CLEAR_RREGS r9 + xorl %eax, %eax /* do not leak kernel information */ + movq %rax, R11(%rsp) + movq %rax, R10(%rsp) + movq %r9, R9(%rsp) + movq %rax, R8(%rsp) movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ @@ -422,7 +425,11 @@ ia32_badarg: jmp ia32_sysret ia32_ret_from_sys_call: - CLEAR_RREGS + xorl %eax, %eax /* do not leak kernel information */ + movq %rax, R11(%rsp) + movq %rax, R10(%rsp) + movq %rax, R9(%rsp) + movq %rax, R8(%rsp) jmp int_ret_from_sys_call /* From 73cbf687914fd5f4ef88a42a55784fd28b7450cf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 2 Jun 2015 21:04:02 +0200 Subject: [PATCH 443/481] x86/asm/entry/32: Open-code LOAD_ARGS32 This macro is small, has only three callsites, and one of them is slightly different using a conditional parameter. A few saved lines aren't worth the resulting obfuscation. Generated machine code is identical. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433271842-9139-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 54 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 8a45d2cd2bef..56f819e99258 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -29,28 +29,6 @@ .section .entry.text, "ax" - /* - * Reload arg registers from stack in case ptrace changed them. - * We don't reload %eax because syscall_trace_enter() returned - * the %rax value we should see. Instead, we just truncate that - * value to 32 bits again as we did on entry from user mode. - * If it's a new value set by user_regset during entry tracing, - * this matches the normal truncation of the user-mode value. - * If it's -1 to make us punt the syscall, then (u32)-1 is still - * an appropriately invalid value. - */ - .macro LOAD_ARGS32 _r9=0 - .if \_r9 - movl R9(%rsp),%r9d - .endif - movl RCX(%rsp),%ecx - movl RDX(%rsp),%edx - movl RSI(%rsp),%esi - movl RDI(%rsp),%edi - movl %eax,%eax /* zero extension */ - .endm - - #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret32) swapgs @@ -269,7 +247,14 @@ sysenter_tracesys: movq %rax, R8(%rsp) movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ + + /* Reload arg registers from stack. (see sysenter_tracesys) */ + movl RCX(%rsp), %ecx + movl RDX(%rsp), %edx + movl RSI(%rsp), %esi + movl RDI(%rsp), %edi + movl %eax, %eax /* zero extension */ + RESTORE_EXTRA_REGS jmp sysenter_do_call ENDPROC(ia32_sysenter_target) @@ -413,7 +398,15 @@ cstar_tracesys: movq %rax, R8(%rsp) movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ + movl R9(%rsp),%r9d + + /* Reload arg registers from stack. (see sysenter_tracesys) */ + movl RCX(%rsp), %ecx + movl RDX(%rsp), %edx + movl RSI(%rsp), %esi + movl RDI(%rsp), %edi + movl %eax, %eax /* zero extension */ + RESTORE_EXTRA_REGS xchgl %ebp,%r9d jmp cstar_do_call @@ -502,7 +495,18 @@ ia32_tracesys: SAVE_EXTRA_REGS movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ + /* + * Reload arg registers from stack in case ptrace changed them. + * Don't reload %eax because syscall_trace_enter() returned + * the %rax value we should see. But do truncate it to 32 bits. + * If it's -1 to make us punt the syscall, then (u32)-1 is still + * an appropriately invalid value. + */ + movl RCX(%rsp), %ecx + movl RDX(%rsp), %edx + movl RSI(%rsp), %esi + movl RDI(%rsp), %edi + movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS jmp ia32_do_call END(ia32_syscall) From 53e9accf0f7682d717c7b578b6e01fd297ba6630 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 3 Jun 2015 14:56:09 +0200 Subject: [PATCH 444/481] x86/asm/entry/32: Do not use R9 in SYSCALL32 entry point SYSENTER and SYSCALL 32-bit entry points differ in handling of arg2 and arg6. SYSENTER: * ecx arg2 * ebp user stack * 0(%ebp) arg6 SYSCALL: * ebp arg2 * esp user stack * 0(%esp) arg6 Sysenter code loads 0(%ebp) to %ebp right away. (This destroys %ebp. It means we do not preserve it on return. It's not causing problems since userspace VDSO code does not depend on it, and SYSENTER insn can't be sanely used outside of VDSO). Syscall code loads 0(%ebp) to %r9. This allows to eliminate one MOV insn (r9 is a register where arg6 should be for 64-bit ABI), but on audit/ptrace code paths this requires juggling of r9 and ebp: (1) ptrace expects arg6 to be in pt_regs->bp; (2) r9 is callee-clobbered register and needs to be saved/restored around calls to C functions. This patch changes syscall code to load 0(%ebp) to %ebp, making it more similar to sysenter code. It's a bit smaller: text data bss dec hex filename 1407 0 0 1407 57f ia32entry.o.before 1391 0 0 1391 56f ia32entry.o To preserve ABI compat, we restore ebp on exit. Run-tested. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433336169-18964-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 56f819e99258..63219154087e 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -323,7 +323,7 @@ ENTRY(ia32_cstar_target) * 32bit zero extended */ ASM_STAC -1: movl (%r8),%r9d +1: movl (%r8),%ebp _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) @@ -333,7 +333,7 @@ ENTRY(ia32_cstar_target) cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ - /* r9 already loaded */ /* arg6 */ + movl %ebp,%r9d /* arg6 */ xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ @@ -349,6 +349,7 @@ cstar_dispatch: jnz sysretl_audit sysretl_from_sys_call: andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + movl RCX(%rsp), %ebp RESTORE_RSI_RDI_RDX movl RIP(%rsp),%ecx movl EFLAGS(%rsp),%r11d @@ -375,9 +376,8 @@ sysretl_from_sys_call: #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: - movl %r9d,R9(%rsp) /* register to be clobbered by call */ auditsys_entry_common - movl R9(%rsp),%r9d /* reload 6th syscall arg */ + movl %ebp, %r9d /* reload 6th syscall arg */ jmp cstar_dispatch sysretl_audit: @@ -389,16 +389,14 @@ cstar_tracesys: testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jz cstar_auditsys #endif - xchgl %r9d,%ebp SAVE_EXTRA_REGS xorl %eax, %eax /* do not leak kernel information */ movq %rax, R11(%rsp) movq %rax, R10(%rsp) - movq %r9, R9(%rsp) + movq %rax, R9(%rsp) movq %rax, R8(%rsp) - movq %rsp,%rdi /* &pt_regs -> arg1 */ - call syscall_trace_enter - movl R9(%rsp),%r9d + movq %rsp, %rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter /* Reload arg registers from stack. (see sysenter_tracesys) */ movl RCX(%rsp), %ecx @@ -408,8 +406,7 @@ cstar_tracesys: movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS - xchgl %ebp,%r9d - jmp cstar_do_call + jmp cstar_do_call END(ia32_cstar_target) ia32_badarg: From 54ad726c51b7f7dffcc1dc379bedadee19f742f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Jun 2015 13:02:28 +0200 Subject: [PATCH 445/481] x86/asm/entry/32: Improve code readability Make the 64-bit compat 32-bit syscall entry code a bit more readable: - eliminate whitespace noise - use consistent vertical spacing - use consistent assembly coding style similar to entry_64.S - fix various comments No code changed: arch/x86/entry/ia32entry.o: text data bss dec hex filename 1391 0 0 1391 56f ia32entry.o.before 1391 0 0 1391 56f ia32entry.o.after md5: f28501dcc366e68b557313942c6496d6 ia32entry.o.before.asm f28501dcc366e68b557313942c6496d6 ia32entry.o.after.asm Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 287 +++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 141 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 63219154087e..4bb9f7b2bc9c 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -1,15 +1,14 @@ /* - * Compatibility mode system call entry point for x86-64. - * + * Compatibility mode system call entry point for x86-64. + * * Copyright 2000-2002 Andi Kleen, SuSE Labs. - */ - + */ #include "calling.h" #include #include #include -#include -#include +#include +#include #include #include #include @@ -20,11 +19,11 @@ /* Avoid __ASSEMBLER__'ifying just for this. */ #include #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_LE 0x40000000 +#define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -#define sysexit_audit ia32_ret_from_sys_call -#define sysretl_audit ia32_ret_from_sys_call +# define sysexit_audit ia32_ret_from_sys_call +# define sysretl_audit ia32_ret_from_sys_call #endif .section .entry.text, "ax" @@ -37,7 +36,7 @@ ENDPROC(native_usergs_sysret32) #endif /* - * 32bit SYSENTER instruction entry. + * 32-bit SYSENTER instruction entry. * * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. * IF and VM in rflags are cleared (IOW: interrupts are off). @@ -79,7 +78,7 @@ ENTRY(ia32_sysenter_target) pushq %rbp /* pt_regs->sp */ pushfq /* pt_regs->flags */ pushq $__USER32_CS /* pt_regs->cs */ - pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ + pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ @@ -87,15 +86,15 @@ ENTRY(ia32_sysenter_target) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ cld - sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ /* * no need to do an access_ok check here because rbp has been - * 32bit zero extended + * 32-bit zero extended */ ASM_STAC -1: movl (%rbp),%ebp - _ASM_EXTABLE(1b,ia32_badarg) +1: movl (%rbp), %ebp + _ASM_EXTABLE(1b, ia32_badarg) ASM_CLAC /* @@ -103,26 +102,26 @@ ENTRY(ia32_sysenter_target) * ourselves. To save a few cycles, we can check whether * NT was set instead of doing an unconditional popfq. */ - testl $X86_EFLAGS_NT,EFLAGS(%rsp) - jnz sysenter_fix_flags + testl $X86_EFLAGS_NT, EFLAGS(%rsp) + jnz sysenter_fix_flags sysenter_flags_fixed: orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz sysenter_tracesys + jnz sysenter_tracesys sysenter_do_call: - /* 32bit syscall -> 64bit C ABI argument conversion */ - movl %edi,%r8d /* arg5 */ - movl %ebp,%r9d /* arg6 */ - xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ - movl %ebx,%edi /* arg1 */ - movl %edx,%edx /* arg3 (zero extension) */ + /* 32-bit syscall -> 64-bit C ABI argument conversion */ + movl %edi, %r8d /* arg5 */ + movl %ebp, %r9d /* arg6 */ + xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ + movl %ebx, %edi /* arg1 */ + movl %edx, %edx /* arg3 (zero extension) */ sysenter_dispatch: - cmpq $(IA32_NR_syscalls-1),%rax + cmpq $(IA32_NR_syscalls-1), %rax ja 1f - call *ia32_sys_call_table(,%rax,8) - movq %rax,RAX(%rsp) + call *ia32_sys_call_table(, %rax, 8) + movq %rax, RAX(%rsp) 1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -139,21 +138,21 @@ sysexit_from_sys_call: * This code path is still called 'sysexit' because it pairs * with 'sysenter' and it uses the SYSENTER calling convention. */ - andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - movl RIP(%rsp),%ecx /* User %eip */ + andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + movl RIP(%rsp), %ecx /* User %eip */ RESTORE_RSI_RDI - xorl %edx,%edx /* avoid info leaks */ - xorq %r8,%r8 - xorq %r9,%r9 - xorq %r10,%r10 - movl EFLAGS(%rsp),%r11d /* User eflags */ + xorl %edx, %edx /* Do not leak kernel information */ + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + movl EFLAGS(%rsp), %r11d /* User eflags */ TRACE_IRQS_ON /* * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT, * since it avoids a dicey window with interrupts enabled. */ - movl RSP(%rsp),%esp + movl RSP(%rsp), %esp /* * USERGS_SYSRET32 does: @@ -179,51 +178,51 @@ sysexit_from_sys_call: #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common - movl %esi,%r8d /* 5th arg: 4th syscall arg */ - movl %ecx,%r9d /*swap with edx*/ - movl %edx,%ecx /* 4th arg: 3rd syscall arg */ - movl %r9d,%edx /* 3rd arg: 2nd syscall arg */ - movl %ebx,%esi /* 2nd arg: 1st syscall arg */ - movl %eax,%edi /* 1st arg: syscall number */ - call __audit_syscall_entry - movl ORIG_RAX(%rsp),%eax /* reload syscall number */ - movl %ebx,%edi /* reload 1st syscall arg */ - movl RCX(%rsp),%esi /* reload 2nd syscall arg */ - movl RDX(%rsp),%edx /* reload 3rd syscall arg */ - movl RSI(%rsp),%ecx /* reload 4th syscall arg */ - movl RDI(%rsp),%r8d /* reload 5th syscall arg */ + movl %esi, %r8d /* 5th arg: 4th syscall arg */ + movl %ecx, %r9d /* swap with edx */ + movl %edx, %ecx /* 4th arg: 3rd syscall arg */ + movl %r9d, %edx /* 3rd arg: 2nd syscall arg */ + movl %ebx, %esi /* 2nd arg: 1st syscall arg */ + movl %eax, %edi /* 1st arg: syscall number */ + call __audit_syscall_entry + movl ORIG_RAX(%rsp), %eax /* reload syscall number */ + movl %ebx, %edi /* reload 1st syscall arg */ + movl RCX(%rsp), %esi /* reload 2nd syscall arg */ + movl RDX(%rsp), %edx /* reload 3rd syscall arg */ + movl RSI(%rsp), %ecx /* reload 4th syscall arg */ + movl RDI(%rsp), %r8d /* reload 5th syscall arg */ .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz ia32_ret_from_sys_call + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz ia32_ret_from_sys_call TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - movl %eax,%esi /* second arg, syscall return value */ - cmpl $-MAX_ERRNO,%eax /* is it an error ? */ - jbe 1f - movslq %eax, %rsi /* if error sign extend to 64 bits */ -1: setbe %al /* 1 if error, 0 if not */ - movzbl %al,%edi /* zero-extend that into %edi */ - call __audit_syscall_exit - movq RAX(%rsp),%rax /* reload syscall return value */ - movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi + movl %eax, %esi /* second arg, syscall return value */ + cmpl $-MAX_ERRNO, %eax /* is it an error ? */ + jbe 1f + movslq %eax, %rsi /* if error sign extend to 64 bits */ +1: setbe %al /* 1 if error, 0 if not */ + movzbl %al, %edi /* zero-extend that into %edi */ + call __audit_syscall_exit + movq RAX(%rsp), %rax /* reload syscall return value */ + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jz \exit - xorl %eax, %eax /* do not leak kernel information */ + testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jz \exit + xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) movq %rax, R10(%rsp) movq %rax, R9(%rsp) movq %rax, R8(%rsp) - jmp int_with_check + jmp int_with_check .endm sysenter_auditsys: auditsys_entry_common - movl %ebp,%r9d /* reload 6th syscall arg */ - jmp sysenter_dispatch + movl %ebp, %r9d /* reload 6th syscall arg */ + jmp sysenter_dispatch sysexit_audit: auditsys_exit sysexit_from_sys_call @@ -232,7 +231,7 @@ sysexit_audit: sysenter_fix_flags: pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) popfq - jmp sysenter_flags_fixed + jmp sysenter_flags_fixed sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL @@ -240,12 +239,12 @@ sysenter_tracesys: jz sysenter_auditsys #endif SAVE_EXTRA_REGS - xorl %eax, %eax /* do not leak kernel information */ + xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) movq %rax, R10(%rsp) movq %rax, R9(%rsp) movq %rax, R8(%rsp) - movq %rsp,%rdi /* &pt_regs -> arg1 */ + movq %rsp, %rdi /* &pt_regs -> arg1 */ call syscall_trace_enter /* Reload arg registers from stack. (see sysenter_tracesys) */ @@ -253,23 +252,23 @@ sysenter_tracesys: movl RDX(%rsp), %edx movl RSI(%rsp), %esi movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ + movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS jmp sysenter_do_call ENDPROC(ia32_sysenter_target) /* - * 32bit SYSCALL instruction entry. + * 32-bit SYSCALL instruction entry. * - * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, + * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, * then loads new ss, cs, and rip from previously programmed MSRs. * rflags gets masked by a value from another MSR (so CLD and CLAC * are not needed). SYSCALL does not save anything on the stack * and does not change rsp. * * Note: rflags saving+masking-with-MSR happens only in Long mode - * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it). + * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). * Don't get confused: rflags saving+masking depends on Long Mode Active bit * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). @@ -296,12 +295,12 @@ ENTRY(ia32_cstar_target) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK - movl %esp,%r8d - movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp + movl %esp, %r8d + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ - movl %eax,%eax + movl %eax, %eax /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -314,55 +313,58 @@ ENTRY(ia32_cstar_target) pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rbp /* pt_regs->cx */ - movl %ebp,%ecx + movl %ebp, %ecx pushq $-ENOSYS /* pt_regs->ax */ - sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ /* - * no need to do an access_ok check here because r8 has been - * 32bit zero extended + * No need to do an access_ok check here because r8 has been + * 32-bit zero extended: */ ASM_STAC -1: movl (%r8),%ebp - _ASM_EXTABLE(1b,ia32_badarg) +1: movl (%r8), %ebp + _ASM_EXTABLE(1b, ia32_badarg) ASM_CLAC orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz cstar_tracesys cstar_do_call: - /* 32bit syscall -> 64bit C ABI argument conversion */ - movl %edi,%r8d /* arg5 */ - movl %ebp,%r9d /* arg6 */ - xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ - movl %ebx,%edi /* arg1 */ - movl %edx,%edx /* arg3 (zero extension) */ + /* 32-bit syscall -> 64-bit C ABI argument conversion */ + movl %edi, %r8d /* arg5 */ + movl %ebp, %r9d /* arg6 */ + xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ + movl %ebx, %edi /* arg1 */ + movl %edx, %edx /* arg3 (zero extension) */ + cstar_dispatch: - cmpq $(IA32_NR_syscalls-1),%rax + cmpq $(IA32_NR_syscalls-1), %rax ja 1f - call *ia32_sys_call_table(,%rax,8) - movq %rax,RAX(%rsp) + + call *ia32_sys_call_table(, %rax, 8) + movq %rax, RAX(%rsp) 1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz sysretl_audit + testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz sysretl_audit + sysretl_from_sys_call: - andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RCX(%rsp), %ebp RESTORE_RSI_RDI_RDX - movl RIP(%rsp),%ecx - movl EFLAGS(%rsp),%r11d - xorq %r10,%r10 - xorq %r9,%r9 - xorq %r8,%r8 + movl RIP(%rsp), %ecx + movl EFLAGS(%rsp), %r11d + xorq %r10, %r10 + xorq %r9, %r9 + xorq %r8, %r8 TRACE_IRQS_ON - movl RSP(%rsp),%esp + movl RSP(%rsp), %esp /* - * 64bit->32bit SYSRET restores eip from ecx, + * 64-bit->32-bit SYSRET restores eip from ecx, * eflags from r11 (but RF and VM bits are forced to 0), * cs and ss are loaded from MSRs. - * (Note: 32bit->32bit SYSRET is different: since r11 + * (Note: 32-bit->32-bit SYSRET is different: since r11 * does not exist, it merely sets eflags.IF=1). * * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss @@ -377,8 +379,8 @@ sysretl_from_sys_call: #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: auditsys_entry_common - movl %ebp, %r9d /* reload 6th syscall arg */ - jmp cstar_dispatch + movl %ebp, %r9d /* reload 6th syscall arg */ + jmp cstar_dispatch sysretl_audit: auditsys_exit sysretl_from_sys_call @@ -386,16 +388,16 @@ sysretl_audit: cstar_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jz cstar_auditsys + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jz cstar_auditsys #endif SAVE_EXTRA_REGS - xorl %eax, %eax /* do not leak kernel information */ + xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) movq %rax, R10(%rsp) movq %rax, R9(%rsp) movq %rax, R8(%rsp) - movq %rsp, %rdi /* &pt_regs -> arg1 */ + movq %rsp, %rdi /* &pt_regs -> arg1 */ call syscall_trace_enter /* Reload arg registers from stack. (see sysenter_tracesys) */ @@ -403,24 +405,24 @@ cstar_tracesys: movl RDX(%rsp), %edx movl RSI(%rsp), %esi movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ + movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS jmp cstar_do_call END(ia32_cstar_target) - + ia32_badarg: ASM_CLAC - movq $-EFAULT,%rax - jmp ia32_sysret + movq $-EFAULT, %rax + jmp ia32_sysret ia32_ret_from_sys_call: - xorl %eax, %eax /* do not leak kernel information */ + xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) movq %rax, R10(%rsp) movq %rax, R9(%rsp) movq %rax, R8(%rsp) - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call /* * Emulated IA32 system calls via int 0x80. @@ -454,7 +456,7 @@ ENTRY(ia32_syscall) ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ - movl %eax,%eax + movl %eax, %eax /* Construct struct pt_regs on stack (iret frame is already on stack) */ pushq %rax /* pt_regs->orig_ax */ @@ -468,30 +470,33 @@ ENTRY(ia32_syscall) pushq $0 /* pt_regs->r10 */ pushq $0 /* pt_regs->r11 */ cld - sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ + sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz ia32_tracesys - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz ia32_tracesys ia32_do_call: - /* 32bit syscall -> 64bit C ABI argument conversion */ - movl %edi,%r8d /* arg5 */ - movl %ebp,%r9d /* arg6 */ - xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ - movl %ebx,%edi /* arg1 */ - movl %edx,%edx /* arg3 (zero extension) */ - cmpq $(IA32_NR_syscalls-1),%rax + /* 32-bit syscall -> 64-bit C ABI argument conversion */ + movl %edi, %r8d /* arg5 */ + movl %ebp, %r9d /* arg6 */ + xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ + movl %ebx, %edi /* arg1 */ + movl %edx, %edx /* arg3 (zero extension) */ + cmpq $(IA32_NR_syscalls-1), %rax ja 1f - call *ia32_sys_call_table(,%rax,8) # xxx: rip relative + + call *ia32_sys_call_table(, %rax, 8) /* RIP relative */ + ia32_sysret: - movq %rax,RAX(%rsp) + movq %rax, RAX(%rsp) 1: - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call ia32_tracesys: SAVE_EXTRA_REGS - movq %rsp,%rdi /* &pt_regs -> arg1 */ - call syscall_trace_enter + movq %rsp, %rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter /* * Reload arg registers from stack in case ptrace changed them. * Don't reload %eax because syscall_trace_enter() returned @@ -503,33 +508,33 @@ ia32_tracesys: movl RDX(%rsp), %edx movl RSI(%rsp), %esi movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ + movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS - jmp ia32_do_call + jmp ia32_do_call END(ia32_syscall) .macro PTREGSCALL label, func ALIGN GLOBAL(\label) - leaq \func(%rip),%rax - jmp ia32_ptregs_common + leaq \func(%rip), %rax + jmp ia32_ptregs_common .endm - PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn - PTREGSCALL stub32_sigreturn, sys32_sigreturn - PTREGSCALL stub32_fork, sys_fork - PTREGSCALL stub32_vfork, sys_vfork + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn + PTREGSCALL stub32_sigreturn, sys32_sigreturn + PTREGSCALL stub32_fork, sys_fork + PTREGSCALL stub32_vfork, sys_vfork ALIGN GLOBAL(stub32_clone) - leaq sys_clone(%rip),%rax + leaq sys_clone(%rip), %rax mov %r8, %rcx - jmp ia32_ptregs_common + jmp ia32_ptregs_common ALIGN ia32_ptregs_common: SAVE_EXTRA_REGS 8 - call *%rax + call *%rax RESTORE_EXTRA_REGS 8 ret END(ia32_ptregs_common) From 5cdc683b7d8b3341a3d18e0c5498bc1e4f3fb990 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 3 Jun 2015 15:58:49 +0200 Subject: [PATCH 446/481] x86/asm/entry/32: Explain the stub32_clone logic The reason for copying of %r8 to %rcx is quite non-obvious. Add a comment which explains why it is done. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433339930-20880-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 4bb9f7b2bc9c..d0c7b28d5670 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -528,6 +528,14 @@ GLOBAL(\label) ALIGN GLOBAL(stub32_clone) leaq sys_clone(%rip), %rax + /* + * 32-bit clone API is clone(..., int tls_val, int *child_tidptr). + * 64-bit clone API is clone(..., int *child_tidptr, int tls_val). + * Native 64-bit kernel's sys_clone() implements the latter. + * We need to swap args here. But since tls_val is in fact ignored + * by sys_clone(), we can get away with an assignment + * (arg4 = arg5) instead of a full swap: + */ mov %r8, %rcx jmp ia32_ptregs_common From 7a5a9824c18f93415944c997dc6bb8eecfddd2e7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 3 Jun 2015 15:58:50 +0200 Subject: [PATCH 447/481] x86/asm/entry/32: Remove unnecessary optimization in stub32_clone Really swap arguments #4 and #5 in stub32_clone instead of "optimizing" it into a move. Yes, tls_val is currently unused. Yes, on some CPUs XCHG is a little bit more expensive than MOV. But a cycle or two on an expensive syscall like clone() is way below noise floor, and this optimization is simply not worth the obfuscation of logic. [ There's also ongoing work on the clone() ABI by Josh Triplett that will depend on this change later on. ] Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Josh Triplett Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433339930-20880-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/ia32entry.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index d0c7b28d5670..9558dacf32b9 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -529,14 +529,13 @@ GLOBAL(\label) GLOBAL(stub32_clone) leaq sys_clone(%rip), %rax /* - * 32-bit clone API is clone(..., int tls_val, int *child_tidptr). - * 64-bit clone API is clone(..., int *child_tidptr, int tls_val). - * Native 64-bit kernel's sys_clone() implements the latter. - * We need to swap args here. But since tls_val is in fact ignored - * by sys_clone(), we can get away with an assignment - * (arg4 = arg5) instead of a full swap: + * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). + * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). + * + * The native 64-bit kernel's sys_clone() implements the latter, + * so we need to swap arguments here before calling it: */ - mov %r8, %rcx + xchg %r8, %rcx jmp ia32_ptregs_common ALIGN From 138bd56a2164c65f68042ba580d88ab70c036fd1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Jun 2015 14:11:49 +0200 Subject: [PATCH 448/481] x86/asm/entry/64/compat: Rename ia32entry.S -> entry_64_compat.S So we now have the following system entry code related files, which define the following system call instruction and other entry paths: entry_32.S # 32-bit binaries on 32-bit kernels entry_64.S # 64-bit binaries on 64-bit kernels entry_64_compat.S # 32-bit binaries on 64-bit kernels Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Josh Triplett Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 2 +- arch/x86/entry/entry_64.S | 2 +- arch/x86/entry/{ia32entry.S => entry_64_compat.S} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename arch/x86/entry/{ia32entry.S => entry_64_compat.S} (100%) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index b93cce1c6bb2..7a144971db79 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -6,5 +6,5 @@ obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += vdso/ obj-y += vsyscall/ -obj-$(CONFIG_IA32_EMULATION) += ia32entry.o syscall_32.o +obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 31770662b940..4cf3dd36aa0d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -516,7 +516,7 @@ ENTRY(ret_from_fork) /* * By the time we get here, we have no idea whether our pt_regs, * ti flags, and ti status came from the 64-bit SYSCALL fast path, - * the slow path, or one of the ia32entry paths. + * the slow path, or one of the 32-bit compat paths. * Use IRET code path to return, since it can safely handle * all of the above. */ diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/entry_64_compat.S similarity index 100% rename from arch/x86/entry/ia32entry.S rename to arch/x86/entry/entry_64_compat.S From 9dac6290945142e6b87d9f027edfee676dcfbfda Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:09 +0200 Subject: [PATCH 449/481] x86/mm/pat: Untangle pat_init() Split it into a BSP and AP version which makes the PAT initialization path actually readable again. Signed-off-by: Borislav Petkov Reviewed-by: Toshi Kani Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 69 +++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index a1c96544099d..476d0780560f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -36,6 +36,8 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt +static bool boot_cpu_done; + static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); static inline void pat_disable(const char *reason) @@ -194,31 +196,47 @@ void pat_init_cache_modes(void) #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) +static void pat_bsp_init(u64 pat) +{ + if (!cpu_has_pat) { + pat_disable("PAT not supported by CPU."); + return; + } + + rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); + if (!boot_pat_state) { + pat_disable("PAT MSR is 0, disabled."); + return; + } + + wrmsrl(MSR_IA32_CR_PAT, pat); + + pat_init_cache_modes(); +} + +static void pat_ap_init(u64 pat) +{ + if (!cpu_has_pat) { + /* + * If this happens we are on a secondary CPU, but switched to + * PAT on the boot CPU. We have no way to undo PAT. + */ + panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); + } + + wrmsrl(MSR_IA32_CR_PAT, pat); +} + void pat_init(void) { u64 pat; - bool boot_cpu = !boot_pat_state; if (!pat_enabled()) return; - if (!cpu_has_pat) { - if (!boot_pat_state) { - pat_disable("PAT not supported by CPU."); - return; - } else { - /* - * If this happens we are on a secondary CPU, but - * switched to PAT on the boot CPU. We have no way to - * undo PAT. - */ - pr_err("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); - BUG(); - } - } - - /* Set PWT to Write-Combining. All other bits stay the same */ /* + * Set PWT to Write-Combining. All other bits stay the same: + * * PTE encoding used in Linux: * PAT * |PCD @@ -233,19 +251,12 @@ void pat_init(void) pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); - /* Boot CPU check */ - if (!boot_pat_state) { - rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); - if (!boot_pat_state) { - pat_disable("PAT read returns always zero, disabled."); - return; - } + if (!boot_cpu_done) { + pat_bsp_init(pat); + boot_cpu_done = true; + } else { + pat_ap_init(pat); } - - wrmsrl(MSR_IA32_CR_PAT, pat); - - if (boot_cpu) - pat_init_cache_modes(); } #undef PAT From 9cd25aac1f44f269de5ecea11f7d927f37f1d01c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:10 +0200 Subject: [PATCH 450/481] x86/mm/pat: Emulate PAT when it is disabled In the case when PAT is disabled on the command line with "nopat" or when virtualization doesn't support PAT (correctly) - see 9d34cfdf4796 ("x86: Don't rely on VMWare emulating PAT MSR correctly"). we emulate it using the PWT and PCD cache attribute bits. Get rid of boot_pat_state while at it. Based on a conglomerate patch from Toshi Kani. Signed-off-by: Borislav Petkov Reviewed-by: Toshi Kani Acked-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 2 +- arch/x86/mm/init.c | 6 +-- arch/x86/mm/pat.c | 81 +++++++++++++++++++++++++------------- arch/x86/xen/enlighten.c | 5 ++- 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index cdcff7f7f694..ca6c228d5e62 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -6,7 +6,7 @@ bool pat_enabled(void); extern void pat_init(void); -void pat_init_cache_modes(void); +void pat_init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1d553186c434..8533b46e6bee 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -40,7 +40,7 @@ */ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WB ] = 0 | 0 , - [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , + [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, @@ -50,11 +50,11 @@ EXPORT_SYMBOL(__cachemode2pte_tbl); uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 476d0780560f..6dc7826e4797 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -68,8 +68,6 @@ static int __init pat_debug_setup(char *str) } __setup("debugpat", pat_debug_setup); -static u64 __read_mostly boot_pat_state; - #ifdef CONFIG_X86_PAT /* * X86 PAT uses page flags WC and Uncached together to keep track of @@ -177,14 +175,12 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void pat_init_cache_modes(void) +void pat_init_cache_modes(u64 pat) { - int i; enum page_cache_mode cache; char pat_msg[33]; - u64 pat; + int i; - rdmsrl(MSR_IA32_CR_PAT, pat); pat_msg[32] = 0; for (i = 7; i >= 0; i--) { cache = pat_get_cache_mode((pat >> (i * 8)) & 7, @@ -198,24 +194,33 @@ void pat_init_cache_modes(void) static void pat_bsp_init(u64 pat) { + u64 tmp_pat; + if (!cpu_has_pat) { pat_disable("PAT not supported by CPU."); return; } - rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); - if (!boot_pat_state) { + if (!pat_enabled()) + goto done; + + rdmsrl(MSR_IA32_CR_PAT, tmp_pat); + if (!tmp_pat) { pat_disable("PAT MSR is 0, disabled."); return; } wrmsrl(MSR_IA32_CR_PAT, pat); - pat_init_cache_modes(); +done: + pat_init_cache_modes(pat); } static void pat_ap_init(u64 pat) { + if (!pat_enabled()) + return; + if (!cpu_has_pat) { /* * If this happens we are on a secondary CPU, but switched to @@ -231,25 +236,45 @@ void pat_init(void) { u64 pat; - if (!pat_enabled()) - return; - - /* - * Set PWT to Write-Combining. All other bits stay the same: - * - * PTE encoding used in Linux: - * PAT - * |PCD - * ||PWT - * ||| - * 000 WB _PAGE_CACHE_WB - * 001 WC _PAGE_CACHE_WC - * 010 UC- _PAGE_CACHE_UC_MINUS - * 011 UC _PAGE_CACHE_UC - * PAT bit unused - */ - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + if (!pat_enabled()) { + /* + * No PAT. Emulate the PAT table that corresponds to the two + * cache bits, PWT (Write Through) and PCD (Cache Disable). This + * setup is the same as the BIOS default setup when the system + * has PAT but the "nopat" boot option has been specified. This + * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. + * + * PTE encoding used: + * + * PCD + * |PWT PAT + * || slot + * 00 0 WB : _PAGE_CACHE_MODE_WB + * 01 1 WT : _PAGE_CACHE_MODE_WT + * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 11 3 UC : _PAGE_CACHE_MODE_UC + * + * NOTE: When WC or WP is used, it is redirected to UC- per + * the default setup in __cachemode2pte_tbl[]. + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } else { + /* + * PTE encoding used in Linux: + * PAT + * |PCD + * ||PWT + * ||| + * 000 WB _PAGE_CACHE_WB + * 001 WC _PAGE_CACHE_WC + * 010 UC- _PAGE_CACHE_UC_MINUS + * 011 UC _PAGE_CACHE_UC + * PAT bit unused + */ + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + } if (!boot_cpu_done) { pat_bsp_init(pat); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..53233a9beea9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1467,6 +1467,7 @@ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; + u64 pat; int rc; if (!xen_start_info) @@ -1574,8 +1575,8 @@ asmlinkage __visible void __init xen_start_kernel(void) * Modify the cache mode translation tables to match Xen's PAT * configuration. */ - - pat_init_cache_modes(); + rdmsrl(MSR_IA32_CR_PAT, pat); + pat_init_cache_modes(pat); /* keep using Xen gdt for now; no urgent need to change it */ From 7202fdb1b3299ec78dc1e7702260947ec20dd9e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:11 +0200 Subject: [PATCH 451/481] x86/mm/pat: Remove pat_enabled() checks Now that we emulate a PAT table when PAT is disabled, there's no need for those checks anymore as the PAT abstraction will handle those cases too. Based on a conglomerate patch from Toshi Kani. Signed-off-by: Borislav Petkov Reviewed-by: Toshi Kani Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/iomap_32.c | 12 ++++++------ arch/x86/mm/ioremap.c | 5 +---- arch/x86/mm/pageattr.c | 3 --- arch/x86/mm/pat.c | 13 +++---------- 4 files changed, 10 insertions(+), 23 deletions(-) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 3a2ec8790ca7..a9dc7a37e6a2 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -77,13 +77,13 @@ void __iomem * iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { /* - * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. - * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the - * MTRR is UC or WC. UC_MINUS gets the real intention, of the - * user, which is "WC if the MTRR is WC, UC if you can't do that." + * For non-PAT systems, translate non-WB request to UC- just in + * case the caller set the PWT bit to prot directly without using + * pgprot_writecombine(). UC- translates to uncached if the MTRR + * is UC or WC. UC- gets the real intention, of the user, which is + * "WC if the MTRR is WC, UC if you can't do that." */ - if (!pat_enabled() && pgprot_val(prot) == - (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC))) + if (!pat_enabled() && pgprot2cachemode(prot) != _PAGE_CACHE_MODE_WB) prot = __pgprot(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b0da3588b452..cc0f17c5ad9f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -292,11 +292,8 @@ EXPORT_SYMBOL_GPL(ioremap_uc); */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { - if (pat_enabled()) - return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, __builtin_return_address(0)); - else - return ioremap_nocache(phys_addr, size); } EXPORT_SYMBOL(ioremap_wc); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fae3c5366ac0..31b4f3fd1207 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1572,9 +1572,6 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled()) - return set_memory_uc(addr, numpages); - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 6dc7826e4797..f89e460c55a8 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -438,12 +438,8 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, if (!pat_enabled()) { /* This is identical to page table setting without PAT */ - if (new_type) { - if (req_type == _PAGE_CACHE_MODE_WC) - *new_type = _PAGE_CACHE_MODE_UC_MINUS; - else - *new_type = req_type; - } + if (new_type) + *new_type = req_type; return 0; } @@ -947,11 +943,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, pgprot_t pgprot_writecombine(pgprot_t prot) { - if (pat_enabled()) - return __pgprot(pgprot_val(prot) | + return __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); - else - return pgprot_noncached(prot); } EXPORT_SYMBOL_GPL(pgprot_writecombine); From d79a40caf8e15a2a15ecdefdc9d05865d4f37baa Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:12 +0200 Subject: [PATCH 452/481] x86/mm/pat: Use 7th PAT MSR slot for Write-Through PAT type Assign Write-Through type to the PA7 slot in the PAT MSR when the processor is not affected by PAT errata. The PA7 slot is chosen to improve robustness in the presence of errata that might cause the high PAT bit to be ignored. This way a buggy PA7 slot access will hit the PA3 slot, which is UC, so at worst we lose performance without causing a correctness issue. The following Intel processors are affected by the PAT errata. Errata CPUID ---------------------------------------------------- Pentium 2, A52 family 0x6, model 0x5 Pentium 3, E27 family 0x6, model 0x7, 0x8 Pentium 3 Xenon, G26 family 0x6, model 0x7, 0x8, 0xa Pentium M, Y26 family 0x6, model 0x9 Pentium M 90nm, X9 family 0x6, model 0xd Pentium 4, N46 family 0xf, model 0x0 Instead of making sharp boundary checks, we remain conservative and exclude all Pentium 2, 3, M and 4 family processors. For those, _PAGE_CACHE_MODE_WT is redirected to UC- per the default setup in __cachemode2pte_tbl[]. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: https://lkml.kernel.org/r/1433187393-22688-2-git-send-email-toshi.kani@hp.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 59 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f89e460c55a8..59ab1a0fe21b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -235,6 +235,7 @@ static void pat_ap_init(u64 pat) void pat_init(void) { u64 pat; + struct cpuinfo_x86 *c = &boot_cpu_data; if (!pat_enabled()) { /* @@ -244,7 +245,7 @@ void pat_init(void) * has PAT but the "nopat" boot option has been specified. This * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. * - * PTE encoding used: + * PTE encoding: * * PCD * |PWT PAT @@ -259,21 +260,61 @@ void pat_init(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); - } else { + + } else if ((c->x86_vendor == X86_VENDOR_INTEL) && + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { /* - * PTE encoding used in Linux: + * PAT support with the lower four entries. Intel Pentium 2, + * 3, M, and 4 are affected by PAT errata, which makes the + * upper four entries unusable. To be on the safe side, we don't + * use those. + * + * PTE encoding: * PAT * |PCD - * ||PWT - * ||| - * 000 WB _PAGE_CACHE_WB - * 001 WC _PAGE_CACHE_WC - * 010 UC- _PAGE_CACHE_UC_MINUS - * 011 UC _PAGE_CACHE_UC + * ||PWT PAT + * ||| slot + * 000 0 WB : _PAGE_CACHE_MODE_WB + * 001 1 WC : _PAGE_CACHE_MODE_WC + * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 011 3 UC : _PAGE_CACHE_MODE_UC * PAT bit unused + * + * NOTE: When WT or WP is used, it is redirected to UC- per + * the default setup in __cachemode2pte_tbl[]. */ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + } else { + /* + * Full PAT support. We put WT in slot 7 to improve + * robustness in the presence of errata that might cause + * the high PAT bit to be ignored. This way, a buggy slot 7 + * access will hit slot 3, and slot 3 is UC, so at worst + * we lose performance without causing a correctness issue. + * Pentium 4 erratum N46 is an example for such an erratum, + * although we try not to use PAT at all on affected CPUs. + * + * PTE encoding: + * PAT + * |PCD + * ||PWT PAT + * ||| slot + * 000 0 WB : _PAGE_CACHE_MODE_WB + * 001 1 WC : _PAGE_CACHE_MODE_WC + * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 011 3 UC : _PAGE_CACHE_MODE_UC + * 100 4 WB : Reserved + * 101 5 WC : Reserved + * 110 6 UC-: Reserved + * 111 7 WT : _PAGE_CACHE_MODE_WT + * + * The reserved slots are unused, but mapped to their + * corresponding types in the presence of PAT errata. + */ + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT); } if (!boot_cpu_done) { From 0d69bdff451a10aa48f80509e8bf18fb24683c06 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:13 +0200 Subject: [PATCH 453/481] x86/mm/pat: Change reserve_memtype() for Write-Through type When a target range is in RAM, reserve_ram_pages_type() verifies the requested type. Change it to fail WT and WP requests with -EINVAL since set_page_memtype() is limited to handle three types: WB, WC and UC-. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 59ab1a0fe21b..6311193fd1a2 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -401,9 +401,12 @@ static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) /* * For RAM pages, we use page flags to mark the pages with appropriate type. - * Here we do two pass: - * - Find the memtype of all the pages in the range, look for any conflicts - * - In case of no conflicts, set the new memtype for pages in the range + * The page flags are limited to three types, WB, WC and UC-. WT and WP requests + * fail with -EINVAL, and UC gets redirected to UC-. + * + * Here we do two passes: + * - Find the memtype of all the pages in the range, look for any conflicts. + * - In case of no conflicts, set the new memtype for pages in the range. */ static int reserve_ram_pages_type(u64 start, u64 end, enum page_cache_mode req_type, @@ -412,6 +415,13 @@ static int reserve_ram_pages_type(u64 start, u64 end, struct page *page; u64 pfn; + if ((req_type == _PAGE_CACHE_MODE_WT) || + (req_type == _PAGE_CACHE_MODE_WP)) { + if (new_type) + *new_type = _PAGE_CACHE_MODE_UC_MINUS; + return -EINVAL; + } + if (req_type == _PAGE_CACHE_MODE_UC) { /* We do not support strong UC */ WARN_ON_ONCE(1); @@ -461,6 +471,7 @@ static int free_ram_pages_type(u64 start, u64 end) * - _PAGE_CACHE_MODE_WC * - _PAGE_CACHE_MODE_UC_MINUS * - _PAGE_CACHE_MODE_UC + * - _PAGE_CACHE_MODE_WT * * If new_type is NULL, function will return an error if it cannot reserve the * region with req_type. If new_type is non-NULL, function will return From ecb2febaaa3945e1578359adc30ca818e78540fb Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:14 +0200 Subject: [PATCH 454/481] x86/mm: Teach is_new_memtype_allowed() about Write-Through type __ioremap_caller() calls reserve_memtype() and the passed down @new_pcm contains the actual page cache type it reserved in the success case. is_new_memtype_allowed() verifies if converting to the new page cache type is allowed when @pcm (the requested type) is different from @new_pcm. When WT is requested, the caller expects that writes are ordered and uncached. Therefore, enhance is_new_memtype_allowed() to disallow the following cases: - If the request is WT, mapping type cannot be WB - If the request is WT, mapping type cannot be WC Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index fe57e7a98839..2562e303405b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -398,11 +398,17 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back + * - request is write-through, return cannot be write-back + * - request is write-through, return cannot be write-combine */ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WC && - new_pcm == _PAGE_CACHE_MODE_WB)) { + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WC)) { return 0; } From d838270e2516db11084bed4e294017eb7b646a75 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:15 +0200 Subject: [PATCH 455/481] x86/mm, asm-generic: Add ioremap_wt() for creating Write-Through mappings Add ioremap_wt() for creating Write-Through mappings on x86. It follows the same model as ioremap_wc() for multi-arch support. Define ARCH_HAS_IOREMAP_WT in the x86 version of io.h to indicate that ioremap_wt() is implemented on x86. Also update the PAT documentation file to cover ioremap_wt(). Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 4 +++- arch/x86/include/asm/io.h | 2 ++ arch/x86/mm/ioremap.c | 21 +++++++++++++++++++++ include/asm-generic/io.h | 9 +++++++++ include/asm-generic/iomap.h | 4 ++++ 5 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index 521bd8adc3b8..db0de6cfc351 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -12,7 +12,7 @@ virtual addresses. PAT allows for different types of memory attributes. The most commonly used ones that will be supported at this time are Write-back, Uncached, -Write-combined and Uncached Minus. +Write-combined, Write-through and Uncached Minus. PAT APIs @@ -40,6 +40,8 @@ ioremap_nocache | -- | UC- | UC- | | | | | ioremap_wc | -- | -- | WC | | | | | +ioremap_wt | -- | -- | WT | + | | | | set_memory_uc | UC- | -- | -- | set_memory_wb | | | | | | | | diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a94463063b46..83ec9b1d77cc 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -35,6 +35,7 @@ */ #define ARCH_HAS_IOREMAP_WC +#define ARCH_HAS_IOREMAP_WT #include #include @@ -320,6 +321,7 @@ extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index cc0f17c5ad9f..07cd46a8f30a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -172,6 +172,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, prot = __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); break; + case _PAGE_CACHE_MODE_WT: + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); + break; case _PAGE_CACHE_MODE_WB: break; } @@ -297,6 +301,23 @@ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_wc); +/** + * ioremap_wt - map memory into CPU space write through + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * This version of ioremap ensures that the memory is marked write through. + * Write through stores data into memory while keeping the cache up-to-date. + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size) +{ + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wt); + void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 90ccba7f9f9a..f56094cfdeff 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -785,8 +785,17 @@ static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_wt +#define ioremap_wt ioremap_wt +static inline void __iomem *ioremap_wt(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef iounmap #define iounmap iounmap + static inline void iounmap(void __iomem *addr) { } diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 1b41011643a5..d8f8622fa044 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -66,6 +66,10 @@ extern void ioport_unmap(void __iomem *); #define ioremap_wc ioremap_nocache #endif +#ifndef ARCH_HAS_IOREMAP_WT +#define ioremap_wt ioremap_nocache +#endif + #ifdef CONFIG_PCI /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; From 556269c138a8b2d3f5714b8105fa6119ecc505f2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:16 +0200 Subject: [PATCH 456/481] arch/*/io.h: Add ioremap_wt() to all architectures Add ioremap_wt() to all arch-specific asm/io.h headers which define ioremap_wc() locally. These headers do not include . Some of them include , but ioremap_wt() is defined for consistency since they define all ioremap_xxx locally. In all architectures without Write-Through support, ioremap_wt() is defined indentical to ioremap_nocache(). frv and m68k already have ioremap_writethrough(). On those we add ioremap_wt() indetical to ioremap_writethrough() and defines ARCH_HAS_IOREMAP_WT in both architectures. The ioremap_wt() interface is exported to drivers. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/arc/include/asm/io.h | 1 + arch/arm/include/asm/io.h | 1 + arch/arm64/include/asm/io.h | 1 + arch/avr32/include/asm/io.h | 1 + arch/frv/include/asm/io.h | 7 +++++++ arch/m32r/include/asm/io.h | 1 + arch/m68k/include/asm/io_mm.h | 7 +++++++ arch/m68k/include/asm/io_no.h | 6 ++++++ arch/metag/include/asm/io.h | 3 +++ arch/microblaze/include/asm/io.h | 1 + arch/mn10300/include/asm/io.h | 1 + arch/nios2/include/asm/io.h | 1 + arch/s390/include/asm/io.h | 1 + arch/sparc/include/asm/io_32.h | 1 + arch/sparc/include/asm/io_64.h | 1 + arch/tile/include/asm/io.h | 1 + arch/xtensa/include/asm/io.h | 1 + 17 files changed, 36 insertions(+) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index cabd518cb253..7cc4ced5dbf4 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -20,6 +20,7 @@ extern void iounmap(const void __iomem *addr); #define ioremap_nocache(phy, sz) ioremap(phy, sz) #define ioremap_wc(phy, sz) ioremap(phy, sz) +#define ioremap_wt(phy, sz) ioremap(phy, sz) /* Change struct page to physical address */ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index db58deb00aa7..1b7677d1e5e1 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -336,6 +336,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) +#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define iounmap __arm_iounmap /* diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 540f7c0aea82..7116d3973058 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) +#define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define iounmap __iounmap /* diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h index 4f5ec2bb7172..e998ff5d8e1a 100644 --- a/arch/avr32/include/asm/io.h +++ b/arch/avr32/include/asm/io.h @@ -296,6 +296,7 @@ extern void __iounmap(void __iomem *addr); __iounmap(addr) #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache #define cached(addr) P1SEGADDR(addr) #define uncached(addr) P2SEGADDR(addr) diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h index 0b78bc89e840..1fe98febc081 100644 --- a/arch/frv/include/asm/io.h +++ b/arch/frv/include/asm/io.h @@ -17,6 +17,8 @@ #ifdef __KERNEL__ +#define ARCH_HAS_IOREMAP_WT + #include #include #include @@ -270,6 +272,11 @@ static inline void __iomem *ioremap_writethrough(unsigned long physaddr, unsigne return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); } +static inline void __iomem *ioremap_wt(unsigned long physaddr, unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); +} + static inline void __iomem *ioremap_fullcache(unsigned long physaddr, unsigned long size) { return __ioremap(physaddr, size, IOMAP_FULL_CACHING); diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h index 9cc00dbd59ce..0c3f25ee3381 100644 --- a/arch/m32r/include/asm/io.h +++ b/arch/m32r/include/asm/io.h @@ -68,6 +68,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) extern void iounmap(volatile void __iomem *addr); #define ioremap_nocache(off,size) ioremap(off,size) #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache /* * IO bus memory addresses are also 1:1 with the physical address diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index 8955b40a5dc4..7c12138bb3cb 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -20,6 +20,8 @@ #ifdef __KERNEL__ +#define ARCH_HAS_IOREMAP_WT + #include #include #include @@ -470,6 +472,11 @@ static inline void __iomem *ioremap_writethrough(unsigned long physaddr, { return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); } +static inline void __iomem *ioremap_wt(unsigned long physaddr, + unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); +} static inline void __iomem *ioremap_fullcache(unsigned long physaddr, unsigned long size) { diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h index a93c8cde4d38..5fff9a22296e 100644 --- a/arch/m68k/include/asm/io_no.h +++ b/arch/m68k/include/asm/io_no.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#define ARCH_HAS_IOREMAP_WT + #include #include @@ -157,6 +159,10 @@ static inline void *ioremap_writethrough(unsigned long physaddr, unsigned long s { return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); } +static inline void *ioremap_wt(unsigned long physaddr, unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); +} static inline void *ioremap_fullcache(unsigned long physaddr, unsigned long size) { return __ioremap(physaddr, size, IOMAP_FULL_CACHING); diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h index d5779b0ec573..9890f21eadbe 100644 --- a/arch/metag/include/asm/io.h +++ b/arch/metag/include/asm/io.h @@ -160,6 +160,9 @@ extern void __iounmap(void __iomem *addr); #define ioremap_wc(offset, size) \ __ioremap((offset), (size), _PAGE_WR_COMBINE) +#define ioremap_wt(offset, size) \ + __ioremap((offset), (size), 0) + #define iounmap(addr) \ __iounmap(addr) diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 940f5fc1d1da..ec3da1135994 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -43,6 +43,7 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); #define ioremap_nocache(addr, size) ioremap((addr), (size)) #define ioremap_fullcache(addr, size) ioremap((addr), (size)) #define ioremap_wc(addr, size) ioremap((addr), (size)) +#define ioremap_wt(addr, size) ioremap((addr), (size)) #endif /* CONFIG_MMU */ diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h index cc4a2ba9e228..07c5b4a3903b 100644 --- a/arch/mn10300/include/asm/io.h +++ b/arch/mn10300/include/asm/io.h @@ -282,6 +282,7 @@ static inline void __iomem *ioremap_nocache(unsigned long offset, unsigned long } #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache static inline void iounmap(void __iomem *addr) { diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h index 6e24d7cceb0c..c5a62da22cd2 100644 --- a/arch/nios2/include/asm/io.h +++ b/arch/nios2/include/asm/io.h @@ -46,6 +46,7 @@ static inline void iounmap(void __iomem *addr) } #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache /* Pages to physical address... */ #define page_to_phys(page) virt_to_phys(page_to_virt(page)) diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 30fd5c84680e..cb5fdf3a78fc 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h index 407ac14295f4..57f26c398dc9 100644 --- a/arch/sparc/include/asm/io_32.h +++ b/arch/sparc/include/asm/io_32.h @@ -129,6 +129,7 @@ static inline void sbus_memcpy_toio(volatile void __iomem *dst, void __iomem *ioremap(unsigned long offset, unsigned long size); #define ioremap_nocache(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) +#define ioremap_wt(X,Y) ioremap((X),(Y)) void iounmap(volatile void __iomem *addr); /* Create a virtual mapping cookie for an IO port range */ diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 50d4840d9aeb..c32fa3f752c8 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -402,6 +402,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) #define ioremap_nocache(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) +#define ioremap_wt(X,Y) ioremap((X),(Y)) static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 6ef4ecab1df2..9c3d950a7231 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -54,6 +54,7 @@ extern void iounmap(volatile void __iomem *addr); #define ioremap_nocache(physaddr, size) ioremap(physaddr, size) #define ioremap_wc(physaddr, size) ioremap(physaddr, size) +#define ioremap_wt(physaddr, size) ioremap(physaddr, size) #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index fe1600a09438..c39bb6e61911 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -59,6 +59,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset, } #define ioremap_wc ioremap_nocache +#define ioremap_wt ioremap_nocache static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { From c7c95f19f37976731efca9576f6e4a22067798a2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:17 +0200 Subject: [PATCH 457/481] video/fbdev, asm/io.h: Remove ioremap_writethrough() Replace all calls to ioremap_writethrough() with ioremap_wt(). Remove ioremap_writethrough() too. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Geert Uytterhoeven Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/frv/include/asm/io.h | 5 ----- arch/m68k/include/asm/io_mm.h | 5 ----- arch/m68k/include/asm/io_no.h | 4 ---- arch/microblaze/include/asm/io.h | 1 - arch/tile/include/asm/io.h | 1 - drivers/video/fbdev/amifb.c | 4 ++-- drivers/video/fbdev/atafb.c | 3 +-- drivers/video/fbdev/hpfb.c | 4 ++-- 8 files changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h index 1fe98febc081..a31b63ec4930 100644 --- a/arch/frv/include/asm/io.h +++ b/arch/frv/include/asm/io.h @@ -267,11 +267,6 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); } -static inline void __iomem *ioremap_writethrough(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); -} - static inline void __iomem *ioremap_wt(unsigned long physaddr, unsigned long size) { return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index 7c12138bb3cb..618c85d3c786 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -467,11 +467,6 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon { return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); } -static inline void __iomem *ioremap_writethrough(unsigned long physaddr, - unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); -} static inline void __iomem *ioremap_wt(unsigned long physaddr, unsigned long size) { diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h index 5fff9a22296e..ad7bd40e6742 100644 --- a/arch/m68k/include/asm/io_no.h +++ b/arch/m68k/include/asm/io_no.h @@ -155,10 +155,6 @@ static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size) { return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); } -static inline void *ioremap_writethrough(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); -} static inline void *ioremap_wt(unsigned long physaddr, unsigned long size) { return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index ec3da1135994..39b6315db82e 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -39,7 +39,6 @@ extern resource_size_t isa_mem_base; extern void iounmap(void __iomem *addr); extern void __iomem *ioremap(phys_addr_t address, unsigned long size); -#define ioremap_writethrough(addr, size) ioremap((addr), (size)) #define ioremap_nocache(addr, size) ioremap((addr), (size)) #define ioremap_fullcache(addr, size) ioremap((addr), (size)) #define ioremap_wc(addr, size) ioremap((addr), (size)) diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 9c3d950a7231..dc61de15c1f9 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -55,7 +55,6 @@ extern void iounmap(volatile void __iomem *addr); #define ioremap_nocache(physaddr, size) ioremap(physaddr, size) #define ioremap_wc(physaddr, size) ioremap(physaddr, size) #define ioremap_wt(physaddr, size) ioremap(physaddr, size) -#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) #define mmiowb() diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c index 35f7900a0573..ee3a703acf23 100644 --- a/drivers/video/fbdev/amifb.c +++ b/drivers/video/fbdev/amifb.c @@ -3705,8 +3705,8 @@ default_chipset: * access the videomem with writethrough cache */ info->fix.smem_start = (u_long)ZTWO_PADDR(videomemory); - videomemory = (u_long)ioremap_writethrough(info->fix.smem_start, - info->fix.smem_len); + videomemory = (u_long)ioremap_wt(info->fix.smem_start, + info->fix.smem_len); if (!videomemory) { dev_warn(&pdev->dev, "Unable to map videomem cached writethrough\n"); diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c index cb9ee2556850..d6ce613e12ad 100644 --- a/drivers/video/fbdev/atafb.c +++ b/drivers/video/fbdev/atafb.c @@ -3185,8 +3185,7 @@ int __init atafb_init(void) /* Map the video memory (physical address given) to somewhere * in the kernel address space. */ - external_screen_base = ioremap_writethrough(external_addr, - external_len); + external_screen_base = ioremap_wt(external_addr, external_len); if (external_vgaiobase) external_vgaiobase = (unsigned long)ioremap(external_vgaiobase, 0x10000); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index a1b7e5fa9b09..9476d196f510 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -241,8 +241,8 @@ static int hpfb_init_one(unsigned long phys_base, unsigned long virt_base) fb_info.fix.line_length = fb_width; fb_height = (in_8(fb_regs + HPFB_FBHMSB) << 8) | in_8(fb_regs + HPFB_FBHLSB); fb_info.fix.smem_len = fb_width * fb_height; - fb_start = (unsigned long)ioremap_writethrough(fb_info.fix.smem_start, - fb_info.fix.smem_len); + fb_start = (unsigned long)ioremap_wt(fb_info.fix.smem_start, + fb_info.fix.smem_len); hpfb_defined.xres = (in_8(fb_regs + HPFB_DWMSB) << 8) | in_8(fb_regs + HPFB_DWLSB); hpfb_defined.yres = (in_8(fb_regs + HPFB_DHMSB) << 8) | in_8(fb_regs + HPFB_DHLSB); hpfb_defined.xres_virtual = hpfb_defined.xres; From d1b4bfbfac32da43bfc8425be1c4303548e20527 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:18 +0200 Subject: [PATCH 458/481] x86/mm/pat: Add pgprot_writethrough() Add pgprot_writethrough() for setting page protection flags to Write-Through mode. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 3 +++ arch/x86/mm/pat.c | 7 +++++++ include/asm-generic/pgtable.h | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 78f0c8cbe316..13f310bfc09a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -367,6 +367,9 @@ extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); +#define pgprot_writethrough pgprot_writethrough +extern pgprot_t pgprot_writethrough(pgprot_t prot); + /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 6311193fd1a2..076187c76d7a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -1000,6 +1000,13 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); +pgprot_t pgprot_writethrough(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); +} +EXPORT_SYMBOL_GPL(pgprot_writethrough); + #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) static struct memtype *memtype_get_idx(loff_t pos) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 39f1d6a2b04d..bd910ceaccfa 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -262,6 +262,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #define pgprot_writecombine pgprot_noncached #endif +#ifndef pgprot_writethrough +#define pgprot_writethrough pgprot_noncached +#endif + #ifndef pgprot_device #define pgprot_device pgprot_noncached #endif From 35a5a10411d87e24b46a7a9dda8d08ef9961b783 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:19 +0200 Subject: [PATCH 459/481] x86/mm/pat: Extend set_page_memtype() to support Write-Through type As set_memory_wb() calls free_ram_pages_type(), which then calls set_page_memtype() with -1, _PGMT_DEFAULT is used for tracking the WB type. _PGMT_WB is defined but unused. Thus, rename _PGMT_DEFAULT to _PGMT_WB to clarify the usage, and release the slot used by _PGMT_WB. Furthermore, change free_ram_pages_type() to call set_page_memtype() with _PGMT_WB, and get_page_memtype() to return _PAGE_CACHE_MODE_WB for _PGMT_WB. Then, define _PGMT_WT in the freed slot. This allows set_page_memtype() to track the WT type. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-12-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 59 +++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 076187c76d7a..188e3e07eeeb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -70,18 +70,22 @@ __setup("debugpat", pat_debug_setup); #ifdef CONFIG_X86_PAT /* - * X86 PAT uses page flags WC and Uncached together to keep track of - * memory type of pages that have backing page struct. X86 PAT supports 3 - * different memory types, _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC and - * _PAGE_CACHE_MODE_UC_MINUS and fourth state where page's memory type has not - * been changed from its default (value of -1 used to denote this). - * Note we do not support _PAGE_CACHE_MODE_UC here. + * X86 PAT uses page flags arch_1 and uncached together to keep track of + * memory type of pages that have backing page struct. + * + * X86 PAT supports 4 different memory types: + * - _PAGE_CACHE_MODE_WB + * - _PAGE_CACHE_MODE_WC + * - _PAGE_CACHE_MODE_UC_MINUS + * - _PAGE_CACHE_MODE_WT + * + * _PAGE_CACHE_MODE_WB is the default type. */ -#define _PGMT_DEFAULT 0 +#define _PGMT_WB 0 #define _PGMT_WC (1UL << PG_arch_1) #define _PGMT_UC_MINUS (1UL << PG_uncached) -#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1) #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) #define _PGMT_CLEAR_MASK (~_PGMT_MASK) @@ -89,14 +93,14 @@ static inline enum page_cache_mode get_page_memtype(struct page *pg) { unsigned long pg_flags = pg->flags & _PGMT_MASK; - if (pg_flags == _PGMT_DEFAULT) - return -1; + if (pg_flags == _PGMT_WB) + return _PAGE_CACHE_MODE_WB; else if (pg_flags == _PGMT_WC) return _PAGE_CACHE_MODE_WC; else if (pg_flags == _PGMT_UC_MINUS) return _PAGE_CACHE_MODE_UC_MINUS; else - return _PAGE_CACHE_MODE_WB; + return _PAGE_CACHE_MODE_WT; } static inline void set_page_memtype(struct page *pg, @@ -113,11 +117,12 @@ static inline void set_page_memtype(struct page *pg, case _PAGE_CACHE_MODE_UC_MINUS: memtype_flags = _PGMT_UC_MINUS; break; - case _PAGE_CACHE_MODE_WB: - memtype_flags = _PGMT_WB; + case _PAGE_CACHE_MODE_WT: + memtype_flags = _PGMT_WT; break; + case _PAGE_CACHE_MODE_WB: default: - memtype_flags = _PGMT_DEFAULT; + memtype_flags = _PGMT_WB; break; } @@ -401,8 +406,10 @@ static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) /* * For RAM pages, we use page flags to mark the pages with appropriate type. - * The page flags are limited to three types, WB, WC and UC-. WT and WP requests - * fail with -EINVAL, and UC gets redirected to UC-. + * The page flags are limited to four types, WB (default), WC, WT and UC-. + * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting + * a new memory type is only allowed for a page mapped with the default WB + * type. * * Here we do two passes: * - Find the memtype of all the pages in the range, look for any conflicts. @@ -415,8 +422,7 @@ static int reserve_ram_pages_type(u64 start, u64 end, struct page *page; u64 pfn; - if ((req_type == _PAGE_CACHE_MODE_WT) || - (req_type == _PAGE_CACHE_MODE_WP)) { + if (req_type == _PAGE_CACHE_MODE_WP) { if (new_type) *new_type = _PAGE_CACHE_MODE_UC_MINUS; return -EINVAL; @@ -433,7 +439,7 @@ static int reserve_ram_pages_type(u64 start, u64 end, page = pfn_to_page(pfn); type = get_page_memtype(page); - if (type != -1) { + if (type != _PAGE_CACHE_MODE_WB) { pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", start, end - 1, type, req_type); if (new_type) @@ -460,7 +466,7 @@ static int free_ram_pages_type(u64 start, u64 end) for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - set_page_memtype(page, -1); + set_page_memtype(page, _PAGE_CACHE_MODE_WB); } return 0; } @@ -601,7 +607,7 @@ int free_memtype(u64 start, u64 end) * Only to be called when PAT is enabled * * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS - * or _PAGE_CACHE_MODE_UC + * or _PAGE_CACHE_MODE_WT. */ static enum page_cache_mode lookup_memtype(u64 paddr) { @@ -613,16 +619,9 @@ static enum page_cache_mode lookup_memtype(u64 paddr) if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { struct page *page; - page = pfn_to_page(paddr >> PAGE_SHIFT); - rettype = get_page_memtype(page); - /* - * -1 from get_page_memtype() implies RAM page is in its - * default state and not reserved, and hence of type WB - */ - if (rettype == -1) - rettype = _PAGE_CACHE_MODE_WB; - return rettype; + page = pfn_to_page(paddr >> PAGE_SHIFT); + return get_page_memtype(page); } spin_lock(&memtype_lock); From 623dffb2a2e059e1ace45b59b3ff21c66c419614 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:20 +0200 Subject: [PATCH 460/481] x86/mm/pat: Add set_memory_wt() for Write-Through type Now that reserve_ram_pages_type() accepts the WT type, add set_memory_wt(), set_memory_array_wt() and set_pages_array_wt() in order to be able to set memory to Write-Through page cache mode. Also, extend ioremap_change_attr() to accept the WT type. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 9 +++-- arch/x86/include/asm/cacheflush.h | 6 ++- arch/x86/mm/ioremap.c | 3 ++ arch/x86/mm/pageattr.c | 62 ++++++++++++++++++++++++------- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index db0de6cfc351..54944c71b819 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -48,6 +48,9 @@ set_memory_uc | UC- | -- | -- | set_memory_wc | WC | -- | -- | set_memory_wb | | | | | | | | +set_memory_wt | WT | -- | -- | + set_memory_wb | | | | + | | | | pci sysfs resource | -- | -- | UC- | | | | | pci sysfs resource_wc | -- | -- | WC | @@ -150,8 +153,8 @@ can be more restrictive, in case of any existing aliasing for that address. For example: If there is an existing uncached mapping, a new ioremap_wc can return uncached mapping in place of write-combine requested. -set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will -first make a region uc or wc and switch it back to wb after use. +set_memory_[uc|wc|wt] and set_memory_wb should be used in pairs, where driver +will first make a region uc, wc or wt and switch it back to wb after use. Over time writes to /proc/mtrr will be deprecated in favor of using PAT based interfaces. Users writing to /proc/mtrr are suggested to use above interfaces. @@ -159,7 +162,7 @@ interfaces. Users writing to /proc/mtrr are suggested to use above interfaces. Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access types. -Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. +Drivers should use set_memory_[uc|wc|wt] to set access type for RAM ranges. PAT debugging diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 47c8e32f621a..b6f7457d12e4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -8,7 +8,7 @@ /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: - * Cachability : UnCached, WriteCombining, WriteBack + * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack * Executability : eXeutable, NoteXecutable * Read/Write : ReadOnly, ReadWrite * Presence : NotPresent @@ -35,9 +35,11 @@ int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wt(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wt(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); @@ -48,10 +50,12 @@ int set_memory_4k(unsigned long addr, int numpages); int set_memory_array_uc(unsigned long *addr, int addrinarray); int set_memory_array_wc(unsigned long *addr, int addrinarray); +int set_memory_array_wt(unsigned long *addr, int addrinarray); int set_memory_array_wb(unsigned long *addr, int addrinarray); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); +int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 07cd46a8f30a..8405c0c6a535 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -42,6 +42,9 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, case _PAGE_CACHE_MODE_WC: err = _set_memory_wc(vaddr, nrpages); break; + case _PAGE_CACHE_MODE_WT: + err = _set_memory_wt(vaddr, nrpages); + break; case _PAGE_CACHE_MODE_WB: err = _set_memory_wb(vaddr, nrpages); break; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 31b4f3fd1207..727158cb3b3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1503,12 +1503,10 @@ EXPORT_SYMBOL(set_memory_uc); static int _set_memory_array(unsigned long *addr, int addrinarray, enum page_cache_mode new_type) { + enum page_cache_mode set_type; int i, j; int ret; - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ for (i = 0; i < addrinarray; i++) { ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, new_type, NULL); @@ -1516,9 +1514,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray, goto out_free; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = change_page_attr_set(addr, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), - 1); + cachemode2pgprot(set_type), 1); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(addr, addrinarray, @@ -1550,6 +1551,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray) } EXPORT_SYMBOL(set_memory_array_wc); +int set_memory_array_wt(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_memory_array_wt); + int _set_memory_wc(unsigned long addr, int numpages) { int ret; @@ -1575,21 +1582,39 @@ int set_memory_wc(unsigned long addr, int numpages) ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) - goto out_err; + return ret; ret = _set_memory_wc(addr, numpages); if (ret) - goto out_free; + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: return ret; } EXPORT_SYMBOL(set_memory_wc); +int _set_memory_wt(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); +} + +int set_memory_wt(unsigned long addr, int numpages) +{ + int ret; + + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_MODE_WT, NULL); + if (ret) + return ret; + + ret = _set_memory_wt(addr, numpages); + if (ret) + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + + return ret; +} +EXPORT_SYMBOL_GPL(set_memory_wt); + int _set_memory_wb(unsigned long addr, int numpages) { /* WB cache mode is hard wired to all cache attribute bits being 0 */ @@ -1680,6 +1705,7 @@ static int _set_pages_array(struct page **pages, int addrinarray, { unsigned long start; unsigned long end; + enum page_cache_mode set_type; int i; int free_idx; int ret; @@ -1693,8 +1719,12 @@ static int _set_pages_array(struct page **pages, int addrinarray, goto err_out; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = cpa_set_pages_array(pages, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS)); + cachemode2pgprot(set_type)); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(NULL, addrinarray, cachemode2pgprot( @@ -1728,6 +1758,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray) } EXPORT_SYMBOL(set_pages_array_wc); +int set_pages_array_wt(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_pages_array_wt); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); From 957561ec0fa8a701f60ca6a0f40cc46f5c554920 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:21 +0200 Subject: [PATCH 461/481] drivers/block/pmem: Map NVDIMM in Write-Through mode The pmem driver maps NVDIMM uncacheable so that we don't lose data which hasn't reached non-volatile storage in the case of a crash. Change this to Write-Through mode which provides uncached writes but cached reads, thus improving read performance. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-14-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/block/pmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c index eabf4a8d0085..095dfaadcaa5 100644 --- a/drivers/block/pmem.c +++ b/drivers/block/pmem.c @@ -139,11 +139,11 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res) } /* - * Map the memory as non-cachable, as we can't write back the contents + * Map the memory as write-through, as we can't write back the contents * of the CPU caches in case of a crash. */ err = -ENOMEM; - pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); + pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size); if (!pmem->virt_addr) goto out_release_region; From bc12edb8739be10fae9f86691a3708d36d00b4f8 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 4 Jun 2015 18:55:22 +0200 Subject: [PATCH 462/481] x86/mce: Add Local MCE definitions Add required definitions to support Local Machine Check Exceptions. Historically, machine check exceptions on Intel x86 processors have been broadcast to all logical processors in the system. Upcoming CPUs will support an opt-in mechanism to request some machine check exceptions be delivered to a single logical processor experiencing the fault. See http://www.intel.com/sdm Volume 3, System Programming Guide, chapter 15 for more information on MSRs and documentation on Local MCE. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1433436928-31903-15-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 5 +++++ arch/x86/include/uapi/asm/msr-index.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6a3034a0a072..ae2bfb895994 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -17,11 +17,16 @@ #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ +#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */ /* MCG_STATUS register defines */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ +#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */ + +/* MCG_EXT_CTL register defines */ +#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */ /* MCi_STATUS register defines */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c469490db4a8..32c69d5fbeac 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -56,6 +56,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 @@ -379,6 +380,7 @@ #define FEATURE_CONTROL_LOCKED (1<<0) #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_LMCE (1<<20) #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) From 88d538672ea26223bca08225bc49f4e65e71683d Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 4 Jun 2015 18:55:23 +0200 Subject: [PATCH 463/481] x86/mce: Add infrastructure to support Local MCE Initialize and prepare for handling LMCEs. Add a boot-time option to disable LMCEs. Signed-off-by: Ashok Raj [ Simplify stuff, align statements for better readability, reflow comments; kill unused lmce_clear(); save us an MSR write if LMCE is already enabled. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1433436928-31903-16-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/boot-options.txt | 3 ++ arch/x86/include/asm/mce.h | 5 +++ arch/x86/kernel/cpu/mcheck/mce.c | 3 ++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 43 +++++++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 5223479291a2..68ed3114c363 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -31,6 +31,9 @@ Machine check (e.g. BIOS or hardware monitoring applications), conflicting with OS's error handling, and you cannot deactivate the agent, then this option will be a help. + mce=no_lmce + Do not opt-in to Local MCE delivery. Use legacy method + to broadcast MCEs. mce=bootlog Enable logging of machine checks left over from booting. Disabled by default on AMD because some BIOS leave bogus ones. diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ae2bfb895994..982dfc3679ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -109,6 +109,7 @@ struct mce_log { struct mca_config { bool dont_log_ce; bool cmci_disabled; + bool lmce_disabled; bool ignore_ce; bool disabled; bool ser; @@ -184,12 +185,16 @@ void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); +void lmce_clear(void); +void lmce_enable(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} +static inline void lmce_clear(void) {} +static inline void lmce_enable(void) {} #endif #ifdef CONFIG_X86_MCE_AMD diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0cbcd3183acf..c8c6577b4ada 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1982,6 +1982,7 @@ void mce_disable_bank(int bank) /* * mce=off Disables machine check * mce=no_cmci Disables CMCI + * mce=no_lmce Disables LMCE * mce=dont_log_ce Clears corrected events silently, no log created for CEs. * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) @@ -2005,6 +2006,8 @@ static int __init mcheck_enable(char *str) cfg->disabled = true; else if (!strcmp(str, "no_cmci")) cfg->cmci_disabled = true; + else if (!strcmp(str, "no_lmce")) + cfg->lmce_disabled = true; else if (!strcmp(str, "dont_log_ce")) cfg->dont_log_ce = true; else if (!strcmp(str, "ignore_ce")) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030ed..2d872deb2c50 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -91,6 +91,36 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +static bool lmce_supported(void) +{ + u64 tmp; + + if (mca_cfg.lmce_disabled) + return false; + + rdmsrl(MSR_IA32_MCG_CAP, tmp); + + /* + * LMCE depends on recovery support in the processor. Hence both + * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. + */ + if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != + (MCG_SER_P | MCG_LMCE_P)) + return false; + + /* + * BIOS should indicate support for LMCE by setting bit 20 in + * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will + * generate a #GP fault. + */ + rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); + if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == + (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) + return true; + + return false; +} + bool mce_intel_cmci_poll(void) { if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) @@ -405,6 +435,19 @@ static void intel_init_cmci(void) cmci_recheck(); } +void intel_init_lmce(void) +{ + u64 val; + + if (!lmce_supported()) + return; + + rdmsrl(MSR_IA32_MCG_EXT_CTL, val); + + if (!(val & MCG_EXT_CTL_LMCE_EN)) + wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); From 243d657eaf540db882f73497060da5a4f7d86a90 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 4 Jun 2015 18:55:24 +0200 Subject: [PATCH 464/481] x86/mce: Handle Local MCE events Add the necessary changes to do_machine_check() to be able to process MCEs signaled as local MCEs. Typically, only recoverable errors (SRAR type) will be Signaled as LMCE. The architecture does not restrict to only those errors, however. When errors are signaled as LMCE, there is no need for the MCE handler to perform rendezvous with other logical processors unlike earlier processors that would broadcast machine check errors. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1433436928-31903-17-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 32 +++++++++++++++++++++----- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c8c6577b4ada..ddc46d67d93e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1047,6 +1047,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) char *msg = "Unknown"; u64 recover_paddr = ~0ull; int flags = MF_ACTION_REQUIRED; + int lmce = 0; prev_state = ist_enter(regs); @@ -1074,11 +1075,20 @@ void do_machine_check(struct pt_regs *regs, long error_code) kill_it = 1; /* - * Go through all the banks in exclusion of the other CPUs. - * This way we don't report duplicated events on shared banks - * because the first one to see it will clear it. + * Check if this MCE is signaled to only this logical processor */ - order = mce_start(&no_way_out); + if (m.mcgstatus & MCG_STATUS_LMCES) + lmce = 1; + else { + /* + * Go through all the banks in exclusion of the other CPUs. + * This way we don't report duplicated events on shared banks + * because the first one to see it will clear it. + * If this is a Local MCE, then no need to perform rendezvous. + */ + order = mce_start(&no_way_out); + } + for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) @@ -1155,8 +1165,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) * Do most of the synchronization with other CPUs. * When there's any problem use only local no_way_out state. */ - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (!lmce) { + if (mce_end(order) < 0) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } else { + /* + * Local MCE skipped calling mce_reign() + * If we found a fatal error, we need to panic here. + */ + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) + mce_panic("Machine check from unknown source", + NULL, NULL); + } /* * At insane "tolerant" levels we take no action. Otherwise diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 2d872deb2c50..844f56c5616d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -452,4 +452,5 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); + intel_init_lmce(); } From c8e56d20f2d190d54c0615775dcb6a23c1091681 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:25 +0200 Subject: [PATCH 465/481] x86: Kill CONFIG_X86_HT In talking to Aravind recently about making certain AMD topology attributes available to the MCE injection module, it seemed like that CONFIG_X86_HT thing is more or less superfluous. It is def_bool y, depends on SMP and gets enabled in the majority of .configs - distro and otherwise - out there. So let's kill it and make code behind it depend directly on SMP. Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Bartosz Golaszewski Cc: Catalin Marinas Cc: Daniel Walter Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Igor Mammedov Cc: Jacob Shin Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433436928-31903-18-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 ++------ arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/cpu/amd.c | 6 +++--- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 ++++---- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8a5cca39e74f..7e39f9b22705 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -261,10 +261,6 @@ config X86_64_SMP def_bool y depends on X86_64 && SMP -config X86_HT - def_bool y - depends on SMP - config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR @@ -865,7 +861,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" - depends on X86_HT + depends on SMP ---help--- SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a @@ -875,7 +871,7 @@ config SCHED_SMT config SCHED_MC def_bool y prompt "Multi-core scheduler support" - depends on X86_HT + depends on SMP ---help--- Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0e8f04f2c26f..8d717faeed22 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -26,7 +26,7 @@ #define _ASM_X86_TOPOLOGY_H #ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT +# ifdef CONFIG_SMP # define ENABLE_TOPO_DEFINES # endif #else diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e4cf63301ff4..eb4f01269b5d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -288,7 +288,7 @@ static int nearby_node(int apicid) * Assumption: Number of cores in each internal node is the same. * (2) AMD processors supporting compute units */ -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { u32 nodes, cores_per_cu = 1; @@ -341,7 +341,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) */ static void amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); @@ -420,7 +420,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) static void early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned bits, ecx; /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bec0b55863e..b6fe2e47f7f1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -508,7 +508,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) void detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; int index_msb, core_bits; static bool printed; @@ -844,7 +844,7 @@ static void generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT +# ifdef CONFIG_SMP c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index edcb0e28c336..be4febc58b94 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -654,7 +654,7 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned int cpu = c->cpu_index; #endif @@ -773,19 +773,19 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP per_cpu(cpu_llc_id, cpu) = l3_id; #endif } -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP /* * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in * turns means that the only possibility is SMT (as indicated in From b72e7464e4cf80117938e6adb8c22fdc1ca46d42 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:26 +0200 Subject: [PATCH 466/481] x86/uapi: Do not export as part of the user API headers This header containing all MSRs and respective bit definitions got exported to userspace in conjunction with the big UAPI shuffle. But, it doesn't belong in the UAPI headers because userspace can do its own MSR defines and exporting them from the kernel blocks us from doing cleanups/renames in that header. Which is ridiculous - it is not kernel's job to export such a header and keep MSRs list and their names stable. Signed-off-by: Borislav Petkov Acked-by: H. Peter Anvin Cc: Andrew Morton Cc: Andy Lutomirski Cc: David Howells Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433436928-31903-19-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/{uapi => }/asm/msr-index.h | 0 arch/x86/include/asm/msr.h | 3 ++- arch/x86/include/uapi/asm/msr.h | 2 -- tools/power/x86/turbostat/Makefile | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) rename arch/x86/include/{uapi => }/asm/msr-index.h (100%) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/asm/msr-index.h similarity index 100% rename from arch/x86/include/uapi/asm/msr-index.h rename to arch/x86/include/asm/msr-index.h diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index de36f22eb0b9..441ecf83d81a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -1,13 +1,14 @@ #ifndef _ASM_X86_MSR_H #define _ASM_X86_MSR_H -#include +#include "msr-index.h" #ifndef __ASSEMBLY__ #include #include #include +#include struct msr { union { diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h index 155e51048fa4..c41f4fe25483 100644 --- a/arch/x86/include/uapi/asm/msr.h +++ b/arch/x86/include/uapi/asm/msr.h @@ -1,8 +1,6 @@ #ifndef _UAPI_ASM_X86_MSR_H #define _UAPI_ASM_X86_MSR_H -#include - #ifndef __ASSEMBLY__ #include diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 4039854560d0..e367b1a85d70 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,7 +9,7 @@ endif turbostat : turbostat.c CFLAGS += -Wall -CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) From 2cd23553b488589f287457b7396470f5e3c40698 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:28:07 +0200 Subject: [PATCH 467/481] x86/asm/entry: Rename compat syscall entry points Rename the following system call entry points: ia32_cstar_target -> entry_SYSCALL_compat ia32_syscall -> entry_INT80_compat The generic naming scheme for x86 system call entry points is: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/x86/entry_64.txt | 4 ++-- arch/x86/entry/entry_64_compat.S | 8 ++++---- arch/x86/entry/syscall_32.c | 6 +++--- arch/x86/include/asm/proto.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt index 9132b86176a3..33884d156125 100644 --- a/Documentation/x86/entry_64.txt +++ b/Documentation/x86/entry_64.txt @@ -18,10 +18,10 @@ Some of these entries are: - system_call: syscall instruction from 64-bit code. - - ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall + - entry_INT80_compat: int 0x80 from 32-bit or 64-bit code; compat syscall either way. - - ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit + - entry_INT80_compat, ia32_sysenter: syscall and sysenter from 32-bit code - interrupt: An array of entries. Every IDT vector that doesn't diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9558dacf32b9..8058892fb5ff 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -288,7 +288,7 @@ ENDPROC(ia32_sysenter_target) * path below. We set up a complete hardware stack frame to share code * with the int 0x80 path. */ -ENTRY(ia32_cstar_target) +ENTRY(entry_SYSCALL_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -409,7 +409,7 @@ cstar_tracesys: RESTORE_EXTRA_REGS jmp cstar_do_call -END(ia32_cstar_target) +END(entry_SYSCALL_compat) ia32_badarg: ASM_CLAC @@ -445,7 +445,7 @@ ia32_ret_from_sys_call: * Assumes it is only called from user space and entered with interrupts off. */ -ENTRY(ia32_syscall) +ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -511,7 +511,7 @@ ia32_tracesys: movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS jmp ia32_do_call -END(ia32_syscall) +END(entry_INT80_compat) .macro PTREGSCALL label, func ALIGN diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 3777189c4a19..e398d033673f 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,7 +10,7 @@ #else #define SYM(sym, compat) sym #define ia32_sys_call_table sys_call_table -#define __NR_ia32_syscall_max __NR_syscall_max +#define __NR_entry_INT80_compat_max __NR_syscall_max #endif #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; @@ -23,11 +23,11 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { +__visible const sys_call_ptr_t ia32_sys_call_table[__NR_entry_INT80_compat_max+1] = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall, + [0 ... __NR_entry_INT80_compat_max] = &sys_ni_syscall, #include }; diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a90f8972dad5..7d2961a231f1 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -8,8 +8,8 @@ void system_call(void); void syscall_init(void); -void ia32_syscall(void); -void ia32_cstar_target(void); +void entry_INT80_compat(void); +void entry_SYSCALL_compat(void); void ia32_sysenter_target(void); void x86_configure_nx(void); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index dcaab87da629..599afcf0005f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -66,7 +66,7 @@ int main(void) DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); - DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); + DEFINE(__NR_entry_INT80_compat_max, sizeof(syscalls_ia32) - 1); DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); return 0; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bec0b55863e..f0b85c401014 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1207,7 +1207,7 @@ void syscall_init(void) wrmsrl(MSR_LSTAR, system_call); #ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, ia32_cstar_target); + wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); /* * This only works on Intel CPUs. * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5e0791f9d3dc..edf97986a53d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -992,7 +992,7 @@ void __init trap_init(void) set_bit(i, used_vectors); #ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); + set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 04529e620559..3c43c03a499c 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -122,7 +122,7 @@ ENDPROC(xen_syscall_target) /* 32-bit compat syscall target */ ENTRY(xen_syscall32_target) undo_xen_syscall - jmp ia32_cstar_target + jmp entry_SYSCALL_compat ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ From 4c8cd0c50d0b1559727bf0ec7ff27caeba2dfe09 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:33:56 +0200 Subject: [PATCH 468/481] x86/asm/entry: Untangle 'ia32_sysenter_target' into two entry points: entry_SYSENTER_32 and entry_SYSENTER_compat So the SYSENTER instruction is pretty quirky and it has different behavior depending on bitness and CPU maker. Yet we create a false sense of coherency by naming it 'ia32_sysenter_target' in both of the cases. Split the name into its two uses: ia32_sysenter_target (32) -> entry_SYSENTER_32 ia32_sysenter_target (64) -> entry_SYSENTER_compat As per the generic naming scheme for x86 system call entry points: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 10 +++++----- arch/x86/entry/entry_64_compat.S | 4 ++-- arch/x86/include/asm/proto.h | 3 ++- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/xen/xen-asm_64.S | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0ac73de925d1..a65f46c3b8e1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -307,7 +307,7 @@ END(resume_kernel) the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub -ENTRY(ia32_sysenter_target) +ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* @@ -412,7 +412,7 @@ sysexit_audit: .popsection _ASM_EXTABLE(1b,2b) PTGS_TO_GS_EX -ENDPROC(ia32_sysenter_target) +ENDPROC(entry_SYSENTER_32) # system call handler stub ENTRY(system_call) @@ -1135,7 +1135,7 @@ END(page_fault) ENTRY(debug) ASM_CLAC - cmpl $ia32_sysenter_target,(%esp) + cmpl $entry_SYSENTER_32,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: @@ -1165,7 +1165,7 @@ ENTRY(nmi) popl %eax je nmi_espfix_stack #endif - cmpl $ia32_sysenter_target,(%esp) + cmpl $entry_SYSENTER_32,(%esp) je nmi_stack_fixup pushl %eax movl %esp,%eax @@ -1176,7 +1176,7 @@ ENTRY(nmi) cmpl $(THREAD_SIZE-20),%eax popl %eax jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) + cmpl $entry_SYSENTER_32,12(%esp) je nmi_debug_stack_check nmi_stack_correct: pushl %eax diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 8058892fb5ff..59840e33d203 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,7 +57,7 @@ ENDPROC(native_usergs_sysret32) * path below. We set up a complete hardware stack frame to share code * with the int 0x80 path. */ -ENTRY(ia32_sysenter_target) +ENTRY(entry_SYSENTER_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -256,7 +256,7 @@ sysenter_tracesys: RESTORE_EXTRA_REGS jmp sysenter_do_call -ENDPROC(ia32_sysenter_target) +ENDPROC(entry_SYSENTER_compat) /* * 32-bit SYSCALL instruction entry. diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 7d2961a231f1..83a7f8227949 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -10,7 +10,8 @@ void syscall_init(void); void entry_INT80_compat(void); void entry_SYSCALL_compat(void); -void ia32_sysenter_target(void); +void entry_SYSENTER_32(void); +void entry_SYSENTER_compat(void); void x86_configure_nx(void); void x86_report_nx(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0b85c401014..b2ae7cec33ca 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1026,7 +1026,7 @@ void enable_sep_cpu(void) (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); out: put_cpu(); @@ -1216,7 +1216,7 @@ void syscall_init(void) */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, ignore_sysret); wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 3c43c03a499c..ccac1b1e6e93 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -128,7 +128,7 @@ ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ ENTRY(xen_sysenter_target) undo_xen_syscall - jmp ia32_sysenter_target + jmp entry_SYSENTER_compat ENDPROC(xen_sysenter_target) #else /* !CONFIG_IA32_EMULATION */ From b2502b418e63fcde0fe1857732a476b5aa3789b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:42:03 +0200 Subject: [PATCH 469/481] x86/asm/entry: Untangle 'system_call' into two entry points: entry_SYSCALL_64 and entry_INT80_32 The 'system_call' entry points differ starkly between native 32-bit and 64-bit kernels: on 32-bit kernels it defines the INT 0x80 entry point, while on 64-bit it's the SYSCALL entry point. This is pretty confusing when looking at generic code, and it also obscures the nature of the entry point at the assembly level. So unangle this by splitting the name into its two uses: system_call (32) -> entry_INT80_32 system_call (64) -> entry_SYSCALL_64 As per the generic naming scheme for x86 system call entry points: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 10 +++++----- arch/x86/include/asm/proto.h | 5 +++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/traps.c | 5 ++--- arch/x86/xen/xen-asm_64.S | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a65f46c3b8e1..d59461032625 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -415,7 +415,7 @@ sysexit_audit: ENDPROC(entry_SYSENTER_32) # system call handler stub -ENTRY(system_call) +ENTRY(entry_INT80_32) ASM_CLAC pushl %eax # save orig_eax SAVE_ALL @@ -508,7 +508,7 @@ ldt_ss: lss (%esp), %esp /* switch to espfix segment */ jmp restore_nocheck #endif -ENDPROC(system_call) +ENDPROC(entry_INT80_32) # perform work that needs to be done immediately before resumption ALIGN diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4cf3dd36aa0d..e1852c407155 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -137,7 +137,7 @@ ENDPROC(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ -ENTRY(system_call) +ENTRY(entry_SYSCALL_64) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -149,7 +149,7 @@ ENTRY(system_call) * after the swapgs, so that it can do the swapgs * for the guest and jump here on syscall. */ -GLOBAL(system_call_after_swapgs) +GLOBAL(entry_SYSCALL_64_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp @@ -182,7 +182,7 @@ GLOBAL(system_call_after_swapgs) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz tracesys -system_call_fastpath: +entry_SYSCALL_64_fastpath: #if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax #else @@ -246,7 +246,7 @@ tracesys: jnz tracesys_phase2 /* if needed, run the slow path */ RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ movq ORIG_RAX(%rsp), %rax - jmp system_call_fastpath /* and return to the fast path */ + jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ tracesys_phase2: SAVE_EXTRA_REGS @@ -411,7 +411,7 @@ syscall_return_via_sysret: opportunistic_sysret_failed: SWAPGS jmp restore_c_regs_and_iret -END(system_call) +END(entry_SYSCALL_64) .macro FORK_LIKE func diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 83a7f8227949..a4a77286cb1d 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -5,11 +5,12 @@ /* misc architecture specific prototypes */ -void system_call(void); void syscall_init(void); -void entry_INT80_compat(void); +void entry_SYSCALL_64(void); void entry_SYSCALL_compat(void); +void entry_INT80_32(void); +void entry_INT80_compat(void); void entry_SYSENTER_32(void); void entry_SYSENTER_compat(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2ae7cec33ca..914be4bbc2e5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1204,7 +1204,7 @@ void syscall_init(void) * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_LSTAR, entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index edf97986a53d..001ddac221a1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -72,8 +72,7 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; #else #include #include - -asmlinkage int system_call(void); +#include #endif /* Must be page-aligned because the real IDT is used in a fixmap. */ @@ -997,7 +996,7 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index ccac1b1e6e93..f22667abf7b9 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -114,7 +114,7 @@ RELOC(xen_sysret32, 1b+1) /* Normal 64-bit system call target */ ENTRY(xen_syscall_target) undo_xen_syscall - jmp system_call_after_swapgs + jmp entry_SYSCALL_64_after_swapgs ENDPROC(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION From a49976d14f780942dafafbbf16f891c27d385ea0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 09:49:11 +0200 Subject: [PATCH 470/481] x86/asm/entry/32: Clean up entry_32.S Make the 32-bit syscall entry code a bit more readable: - use consistent assembly coding style similar to entry_64.S - remove old comments that are not true anymore - eliminate whitespace noise - use consistent vertical spacing - fix various comments No code changed: # arch/x86/entry/entry_32.o: text data bss dec hex filename 6025 0 0 6025 1789 entry_32.o.before 6025 0 0 6025 1789 entry_32.o.after md5: f3fa16b2b0dca804f052deb6b30ba6cb entry_32.o.before.asm f3fa16b2b0dca804f052deb6b30ba6cb entry_32.o.after.asm Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 1125 ++++++++++++++++++------------------- 1 file changed, 562 insertions(+), 563 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d59461032625..edd7aadfacfa 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1,23 +1,12 @@ /* + * Copyright (C) 1991,1992 Linus Torvalds * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. + * entry_32.S contains the system-call and low-level fault and trap handling routines. * * Stack layout in 'syscall_exit': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace needs to have all registers on the stack. + * If the order here is changed, it needs to be + * updated in fork.c:copy_process(), signal.c:do_signal(), * ptrace.c and ptrace.h * * 0(%esp) - %ebx @@ -37,8 +26,6 @@ * 38(%esp) - %eflags * 3C(%esp) - %oldesp * 40(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. */ #include @@ -61,11 +48,11 @@ /* Avoid __ASSEMBLER__'ifying just for this. */ #include #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_LE 0x40000000 +#define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -#define sysenter_audit syscall_trace_entry -#define sysexit_audit syscall_exit_work +# define sysenter_audit syscall_trace_entry +# define sysexit_audit syscall_exit_work #endif .section .entry.text, "ax" @@ -84,16 +71,16 @@ */ #ifdef CONFIG_PREEMPT -#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF +# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else -#define preempt_stop(clobbers) -#define resume_kernel restore_all +# define preempt_stop(clobbers) +# define resume_kernel restore_all #endif .macro TRACE_IRQS_IRET #ifdef CONFIG_TRACE_IRQFLAGS - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? - jz 1f + testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? + jz 1f TRACE_IRQS_ON 1: #endif @@ -112,10 +99,10 @@ /* unfortunately push/pop can't be no-op */ .macro PUSH_GS - pushl $0 + pushl $0 .endm .macro POP_GS pop=0 - addl $(4 + \pop), %esp + addl $(4 + \pop), %esp .endm .macro POP_GS_EX .endm @@ -135,119 +122,119 @@ #else /* CONFIG_X86_32_LAZY_GS */ .macro PUSH_GS - pushl %gs + pushl %gs .endm .macro POP_GS pop=0 -98: popl %gs +98: popl %gs .if \pop <> 0 add $\pop, %esp .endif .endm .macro POP_GS_EX .pushsection .fixup, "ax" -99: movl $0, (%esp) - jmp 98b +99: movl $0, (%esp) + jmp 98b .popsection - _ASM_EXTABLE(98b,99b) + _ASM_EXTABLE(98b, 99b) .endm .macro PTGS_TO_GS -98: mov PT_GS(%esp), %gs +98: mov PT_GS(%esp), %gs .endm .macro PTGS_TO_GS_EX .pushsection .fixup, "ax" -99: movl $0, PT_GS(%esp) - jmp 98b +99: movl $0, PT_GS(%esp) + jmp 98b .popsection - _ASM_EXTABLE(98b,99b) + _ASM_EXTABLE(98b, 99b) .endm .macro GS_TO_REG reg - movl %gs, \reg + movl %gs, \reg .endm .macro REG_TO_PTGS reg - movl \reg, PT_GS(%esp) + movl \reg, PT_GS(%esp) .endm .macro SET_KERNEL_GS reg - movl $(__KERNEL_STACK_CANARY), \reg - movl \reg, %gs + movl $(__KERNEL_STACK_CANARY), \reg + movl \reg, %gs .endm -#endif /* CONFIG_X86_32_LAZY_GS */ +#endif /* CONFIG_X86_32_LAZY_GS */ .macro SAVE_ALL cld PUSH_GS - pushl %fs - pushl %es - pushl %ds - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - movl $(__USER_DS), %edx - movl %edx, %ds - movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es + movl $(__KERNEL_PERCPU), %edx + movl %edx, %fs SET_KERNEL_GS %edx .endm .macro RESTORE_INT_REGS - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax .endm .macro RESTORE_REGS pop=0 RESTORE_INT_REGS -1: popl %ds -2: popl %es -3: popl %fs +1: popl %ds +2: popl %es +3: popl %fs POP_GS \pop .pushsection .fixup, "ax" -4: movl $0, (%esp) - jmp 1b -5: movl $0, (%esp) - jmp 2b -6: movl $0, (%esp) - jmp 3b +4: movl $0, (%esp) + jmp 1b +5: movl $0, (%esp) + jmp 2b +6: movl $0, (%esp) + jmp 3b .popsection - _ASM_EXTABLE(1b,4b) - _ASM_EXTABLE(2b,5b) - _ASM_EXTABLE(3b,6b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE(3b, 6b) POP_GS_EX .endm ENTRY(ret_from_fork) - pushl %eax - call schedule_tail + pushl %eax + call schedule_tail GET_THREAD_INFO(%ebp) - popl %eax - pushl $0x0202 # Reset kernel eflags + popl %eax + pushl $0x0202 # Reset kernel eflags popfl - jmp syscall_exit + jmp syscall_exit END(ret_from_fork) ENTRY(ret_from_kernel_thread) - pushl %eax - call schedule_tail + pushl %eax + call schedule_tail GET_THREAD_INFO(%ebp) - popl %eax - pushl $0x0202 # Reset kernel eflags + popl %eax + pushl $0x0202 # Reset kernel eflags popfl - movl PT_EBP(%esp),%eax - call *PT_EBX(%esp) - movl $0,PT_EAX(%esp) - jmp syscall_exit + movl PT_EBP(%esp), %eax + call *PT_EBX(%esp) + movl $0, PT_EAX(%esp) + jmp syscall_exit ENDPROC(ret_from_kernel_thread) /* @@ -264,62 +251,65 @@ ret_from_exception: ret_from_intr: GET_THREAD_INFO(%ebp) #ifdef CONFIG_VM86 - movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS - movb PT_CS(%esp), %al - andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax #else /* * We can be coming here from child spawned by kernel_thread(). */ - movl PT_CS(%esp), %eax - andl $SEGMENT_RPL_MASK, %eax + movl PT_CS(%esp), %eax + andl $SEGMENT_RPL_MASK, %eax #endif - cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending - jmp restore_all + movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done on + # int/exception return? + jne work_pending + jmp restore_all END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) need_resched: - cmpl $0,PER_CPU_VAR(__preempt_count) - jnz restore_all - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all - call preempt_schedule_irq - jmp need_resched + cmpl $0, PER_CPU_VAR(__preempt_count) + jnz restore_all + testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_all + call preempt_schedule_irq + jmp need_resched END(resume_kernel) #endif -/* SYSENTER_RETURN points to after the "sysenter" instruction in - the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ +/* + * SYSENTER_RETURN points to after the SYSENTER instruction + * in the vsyscall page. See vsyscall-sysentry.S, which defines + * the symbol. + */ - # sysenter call handler stub + # SYSENTER call handler stub ENTRY(entry_SYSENTER_32) - movl TSS_sysenter_sp0(%esp),%esp + movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: /* * Interrupts are disabled here, but we can't trace it until * enough kernel state to call TRACE_IRQS_OFF can be called - but * we immediately enable interrupts at that point anyway. */ - pushl $__USER_DS - pushl %ebp + pushl $__USER_DS + pushl %ebp pushfl - orl $X86_EFLAGS_IF, (%esp) - pushl $__USER_CS + orl $X86_EFLAGS_IF, (%esp) + pushl $__USER_CS /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary: TI_sysenter_return @@ -328,9 +318,9 @@ sysenter_past_esp: * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; * and THREAD_SIZE takes us to the bottom. */ - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) + pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - pushl %eax + pushl %eax SAVE_ALL ENABLE_INTERRUPTS(CLBR_NONE) @@ -338,132 +328,134 @@ sysenter_past_esp: * Load the potential sixth argument from user stack. * Careful about security. */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault + cmpl $__PAGE_OFFSET-3, %ebp + jae syscall_fault ASM_STAC -1: movl (%ebp),%ebp +1: movl (%ebp), %ebp ASM_CLAC - movl %ebp,PT_EBP(%esp) - _ASM_EXTABLE(1b,syscall_fault) + movl %ebp, PT_EBP(%esp) + _ASM_EXTABLE(1b, syscall_fault) GET_THREAD_INFO(%ebp) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) - jnz sysenter_audit + testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) + jnz sysenter_audit sysenter_do_call: - cmpl $(NR_syscalls), %eax - jae sysenter_badsys - call *sys_call_table(,%eax,4) + cmpl $(NR_syscalls), %eax + jae sysenter_badsys + call *sys_call_table(, %eax, 4) sysenter_after_call: - movl %eax,PT_EAX(%esp) + movl %eax, PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx - jnz sysexit_audit + movl TI_flags(%ebp), %ecx + testl $_TIF_ALLWORK_MASK, %ecx + jnz sysexit_audit sysenter_exit: /* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp,%ebp + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx + xorl %ebp, %ebp TRACE_IRQS_ON -1: mov PT_FS(%esp), %fs +1: mov PT_FS(%esp), %fs PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT #ifdef CONFIG_AUDITSYSCALL sysenter_audit: - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ - movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ - /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ - pushl PT_ESI(%esp) /* a3: 5th arg */ - pushl PT_EDX+4(%esp) /* a2: 4th arg */ - call __audit_syscall_entry - popl %ecx /* get that remapped edx off the stack */ - popl %ecx /* get that remapped esi off the stack */ - movl PT_EAX(%esp),%eax /* reload syscall number */ - jmp sysenter_do_call + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp) + jnz syscall_trace_entry + /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ + movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ + /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ + pushl PT_ESI(%esp) /* a3: 5th arg */ + pushl PT_EDX+4(%esp) /* a2: 4th arg */ + call __audit_syscall_entry + popl %ecx /* get that remapped edx off the stack */ + popl %ecx /* get that remapped esi off the stack */ + movl PT_EAX(%esp), %eax /* reload syscall number */ + jmp sysenter_do_call sysexit_audit: - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jnz syscall_exit_work + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx + jnz syscall_exit_work TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - movl %eax,%edx /* second arg, syscall return value */ - cmpl $-MAX_ERRNO,%eax /* is it an error ? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al,%eax /* zero-extend that */ - call __audit_syscall_exit + movl %eax, %edx /* second arg, syscall return value */ + cmpl $-MAX_ERRNO, %eax /* is it an error ? */ + setbe %al /* 1 if so, 0 if not */ + movzbl %al, %eax /* zero-extend that */ + call __audit_syscall_exit DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jnz syscall_exit_work - movl PT_EAX(%esp),%eax /* reload syscall return value */ - jmp sysenter_exit + movl TI_flags(%ebp), %ecx + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx + jnz syscall_exit_work + movl PT_EAX(%esp), %eax /* reload syscall return value */ + jmp sysenter_exit #endif -.pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) - jmp 1b +.pushsection .fixup, "ax" +2: movl $0, PT_FS(%esp) + jmp 1b .popsection - _ASM_EXTABLE(1b,2b) + _ASM_EXTABLE(1b, 2b) PTGS_TO_GS_EX ENDPROC(entry_SYSENTER_32) # system call handler stub ENTRY(entry_INT80_32) ASM_CLAC - pushl %eax # save orig_eax + pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(NR_syscalls), %eax - jae syscall_badsys + # system call tracing in operation / emulation + testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) + jnz syscall_trace_entry + cmpl $(NR_syscalls), %eax + jae syscall_badsys syscall_call: - call *sys_call_table(,%eax,4) + call *sys_call_table(, %eax, 4) syscall_after_call: - movl %eax,PT_EAX(%esp) # store the return value + movl %eax, PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx # current->work - jnz syscall_exit_work + movl TI_flags(%ebp), %ecx + testl $_TIF_ALLWORK_MASK, %ecx # current->work + jnz syscall_exit_work restore_all: TRACE_IRQS_IRET restore_all_notrace: #ifdef CONFIG_X86_ESPFIX32 - movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: PT_OLDSS(%esp) contains the wrong/random values if we - # are returning to the kernel. - # See comments in process.c:copy_thread() for details. - movb PT_OLDSS(%esp), %ah - movb PT_CS(%esp), %al - andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax - cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - je ldt_ss # returning to user-space with LDT SS + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + /* + * Warning: PT_OLDSS(%esp) contains the wrong/random values if we + * are returning to the kernel. + * See comments in process.c:copy_thread() for details. + */ + movb PT_OLDSS(%esp), %ah + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + je ldt_ss # returning to user-space with LDT SS #endif restore_nocheck: - RESTORE_REGS 4 # skip orig_eax/error_code + RESTORE_REGS 4 # skip orig_eax/error_code irq_return: INTERRUPT_RETURN -.section .fixup,"ax" -ENTRY(iret_exc) - pushl $0 # no error code - pushl $do_iret_error - jmp error_code +.section .fixup, "ax" +ENTRY(iret_exc ) + pushl $0 # no error code + pushl $do_iret_error + jmp error_code .previous - _ASM_EXTABLE(irq_return,iret_exc) + _ASM_EXTABLE(irq_return, iret_exc) #ifdef CONFIG_X86_ESPFIX32 ldt_ss: @@ -476,8 +468,8 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, pv_info+PARAVIRT_enabled - jne restore_nocheck + cmpl $0, pv_info+PARAVIRT_enabled + jne restore_nocheck #endif /* @@ -492,21 +484,23 @@ ldt_ss: * a base address that matches for the difference. */ #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) - mov %esp, %edx /* load kernel esp */ - mov PT_OLDESP(%esp), %eax /* load userspace esp */ - mov %dx, %ax /* eax: new kernel esp */ - sub %eax, %edx /* offset (low word is 0) */ + mov %esp, %edx /* load kernel esp */ + mov PT_OLDESP(%esp), %eax /* load userspace esp */ + mov %dx, %ax /* eax: new kernel esp */ + sub %eax, %edx /* offset (low word is 0) */ shr $16, %edx - mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ - mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ - pushl $__ESPFIX_SS - pushl %eax /* new kernel esp */ - /* Disable interrupts, but do not irqtrace this section: we + mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ + mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ + pushl $__ESPFIX_SS + pushl %eax /* new kernel esp */ + /* + * Disable interrupts, but do not irqtrace this section: we * will soon execute iret and the tracer was already set to - * the irqstate after the iret */ + * the irqstate after the IRET: + */ DISABLE_INTERRUPTS(CLBR_EAX) - lss (%esp), %esp /* switch to espfix segment */ - jmp restore_nocheck + lss (%esp), %esp /* switch to espfix segment */ + jmp restore_nocheck #endif ENDPROC(entry_INT80_32) @@ -514,93 +508,93 @@ ENDPROC(entry_INT80_32) ALIGN work_pending: testb $_TIF_NEED_RESCHED, %cl - jz work_notifysig + jz work_notifysig work_resched: - call schedule + call schedule LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt + # setting need_resched or sigpending + # between sampling and the iret TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all + movl TI_flags(%ebp), %ecx + andl $_TIF_WORK_MASK, %ecx # is there any work to be done other + # than syscall tracing? + jz restore_all testb $_TIF_NEED_RESCHED, %cl - jnz work_resched + jnz work_resched -work_notifysig: # deal with pending signals and - # notify-resume requests +work_notifysig: # deal with pending signals and + # notify-resume requests #ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jnz work_notifysig_v86 # returning to kernel-space or - # vm86-space + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) + movl %esp, %eax + jnz work_notifysig_v86 # returning to kernel-space or + # vm86-space 1: #else - movl %esp, %eax + movl %esp, %eax #endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl + movb PT_CS(%esp), %bl andb $SEGMENT_RPL_MASK, %bl cmpb $USER_RPL, %bl - jb resume_kernel - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace + jb resume_kernel + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace #ifdef CONFIG_VM86 ALIGN work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - movl %eax, %esp - jmp 1b + pushl %ecx # save ti_flags for do_notify_resume + call save_v86_state # %eax contains pt_regs pointer + popl %ecx + movl %eax, %esp + jmp 1b #endif END(work_pending) # perform syscall exit tracing ALIGN syscall_trace_entry: - movl $-ENOSYS,PT_EAX(%esp) - movl %esp, %eax - call syscall_trace_enter + movl $-ENOSYS, PT_EAX(%esp) + movl %esp, %eax + call syscall_trace_enter /* What it returned is what we'll actually use. */ - cmpl $(NR_syscalls), %eax - jnae syscall_call - jmp syscall_exit + cmpl $(NR_syscalls), %eax + jnae syscall_call + jmp syscall_exit END(syscall_trace_entry) # perform syscall exit tracing ALIGN syscall_exit_work: - testl $_TIF_WORK_SYSCALL_EXIT, %ecx - jz work_pending + testl $_TIF_WORK_SYSCALL_EXIT, %ecx + jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call - # schedule() instead - movl %esp, %eax - call syscall_trace_leave - jmp resume_userspace + ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call + # schedule() instead + movl %esp, %eax + call syscall_trace_leave + jmp resume_userspace END(syscall_exit_work) syscall_fault: ASM_CLAC GET_THREAD_INFO(%ebp) - movl $-EFAULT,PT_EAX(%esp) - jmp resume_userspace + movl $-EFAULT, PT_EAX(%esp) + jmp resume_userspace END(syscall_fault) syscall_badsys: - movl $-ENOSYS,%eax - jmp syscall_after_call + movl $-ENOSYS, %eax + jmp syscall_after_call END(syscall_badsys) sysenter_badsys: - movl $-ENOSYS,%eax - jmp sysenter_after_call + movl $-ENOSYS, %eax + jmp sysenter_after_call END(sysenter_badsys) .macro FIXUP_ESPFIX_STACK @@ -613,24 +607,24 @@ END(sysenter_badsys) */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ + mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ shl $16, %eax - addl %esp, %eax /* the adjusted stack pointer */ - pushl $__KERNEL_DS - pushl %eax - lss (%esp), %esp /* switch to the normal stack segment */ + addl %esp, %eax /* the adjusted stack pointer */ + pushl $__KERNEL_DS + pushl %eax + lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm .macro UNWIND_ESPFIX_STACK #ifdef CONFIG_X86_ESPFIX32 - movl %ss, %eax + movl %ss, %eax /* see if on espfix stack */ - cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + cmpw $__ESPFIX_SS, %ax + jne 27f + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es /* switch to normal stack */ FIXUP_ESPFIX_STACK 27: @@ -645,7 +639,7 @@ END(sysenter_badsys) ENTRY(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushl $(~vector+0x80) /* Note: always in signed byte range */ + pushl $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt .align 8 @@ -659,35 +653,34 @@ END(irq_entries_start) .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: ASM_CLAC - addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ + addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ SAVE_ALL TRACE_IRQS_OFF - movl %esp,%eax - call do_IRQ - jmp ret_from_intr + movl %esp, %eax + call do_IRQ + jmp ret_from_intr ENDPROC(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ ASM_CLAC; \ - pushl $~(nr); \ + pushl $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ - movl %esp,%eax; \ - call fn; \ - jmp ret_from_intr; \ + movl %esp, %eax; \ + call fn; \ + jmp ret_from_intr; \ ENDPROC(name) #ifdef CONFIG_TRACING -#define TRACE_BUILD_INTERRUPT(name, nr) \ - BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) +# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) #else -#define TRACE_BUILD_INTERRUPT(name, nr) +# define TRACE_BUILD_INTERRUPT(name, nr) #endif -#define BUILD_INTERRUPT(name, nr) \ - BUILD_INTERRUPT3(name, nr, smp_##name); \ +#define BUILD_INTERRUPT(name, nr) \ + BUILD_INTERRUPT3(name, nr, smp_##name); \ TRACE_BUILD_INTERRUPT(name, nr) /* The include is where all of the SMP etc. interrupts come from */ @@ -695,30 +688,30 @@ ENDPROC(name) ENTRY(coprocessor_error) ASM_CLAC - pushl $0 - pushl $do_coprocessor_error - jmp error_code + pushl $0 + pushl $do_coprocessor_error + jmp error_code END(coprocessor_error) ENTRY(simd_coprocessor_error) ASM_CLAC - pushl $0 + pushl $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl $do_general_protection", \ - "pushl $do_simd_coprocessor_error", \ + ALTERNATIVE "pushl $do_general_protection", \ + "pushl $do_simd_coprocessor_error", \ X86_FEATURE_XMM #else - pushl $do_simd_coprocessor_error + pushl $do_simd_coprocessor_error #endif - jmp error_code + jmp error_code END(simd_coprocessor_error) ENTRY(device_not_available) ASM_CLAC - pushl $-1 # mark this as an int - pushl $do_device_not_available - jmp error_code + pushl $-1 # mark this as an int + pushl $do_device_not_available + jmp error_code END(device_not_available) #ifdef CONFIG_PARAVIRT @@ -735,165 +728,171 @@ END(native_irq_enable_sysexit) ENTRY(overflow) ASM_CLAC - pushl $0 - pushl $do_overflow - jmp error_code + pushl $0 + pushl $do_overflow + jmp error_code END(overflow) ENTRY(bounds) ASM_CLAC - pushl $0 - pushl $do_bounds - jmp error_code + pushl $0 + pushl $do_bounds + jmp error_code END(bounds) ENTRY(invalid_op) ASM_CLAC - pushl $0 - pushl $do_invalid_op - jmp error_code + pushl $0 + pushl $do_invalid_op + jmp error_code END(invalid_op) ENTRY(coprocessor_segment_overrun) ASM_CLAC - pushl $0 - pushl $do_coprocessor_segment_overrun - jmp error_code + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp error_code END(coprocessor_segment_overrun) ENTRY(invalid_TSS) ASM_CLAC - pushl $do_invalid_TSS - jmp error_code + pushl $do_invalid_TSS + jmp error_code END(invalid_TSS) ENTRY(segment_not_present) ASM_CLAC - pushl $do_segment_not_present - jmp error_code + pushl $do_segment_not_present + jmp error_code END(segment_not_present) ENTRY(stack_segment) ASM_CLAC - pushl $do_stack_segment - jmp error_code + pushl $do_stack_segment + jmp error_code END(stack_segment) ENTRY(alignment_check) ASM_CLAC - pushl $do_alignment_check - jmp error_code + pushl $do_alignment_check + jmp error_code END(alignment_check) ENTRY(divide_error) ASM_CLAC - pushl $0 # no error code - pushl $do_divide_error - jmp error_code + pushl $0 # no error code + pushl $do_divide_error + jmp error_code END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) ASM_CLAC - pushl $0 - pushl machine_check_vector - jmp error_code + pushl $0 + pushl machine_check_vector + jmp error_code END(machine_check) #endif ENTRY(spurious_interrupt_bug) ASM_CLAC - pushl $0 - pushl $do_spurious_interrupt_bug - jmp error_code + pushl $0 + pushl $do_spurious_interrupt_bug + jmp error_code END(spurious_interrupt_bug) #ifdef CONFIG_XEN -/* Xen doesn't set %esp to be precisely what the normal sysenter - entrypoint expects, so fix it up before using the normal path. */ +/* + * Xen doesn't set %esp to be precisely what the normal SYSENTER + * entry point expects, so fix it up before using the normal path. + */ ENTRY(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp sysenter_past_esp + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ + pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF - /* Check to see if we got the event in the critical - region in xen_iret_direct, after we've reenabled - events and checked for pending events. This simulates - iret instruction's behaviour where it delivers a - pending interrupt when enabling interrupts. */ - movl PT_EIP(%esp),%eax - cmpl $xen_iret_start_crit,%eax - jb 1f - cmpl $xen_iret_end_crit,%eax - jae 1f + /* + * Check to see if we got the event in the critical + * region in xen_iret_direct, after we've reenabled + * events and checked for pending events. This simulates + * iret instruction's behaviour where it delivers a + * pending interrupt when enabling interrupts: + */ + movl PT_EIP(%esp), %eax + cmpl $xen_iret_start_crit, %eax + jb 1f + cmpl $xen_iret_end_crit, %eax + jae 1f - jmp xen_iret_crit_fixup + jmp xen_iret_crit_fixup ENTRY(xen_do_upcall) -1: mov %esp, %eax - call xen_evtchn_do_upcall +1: mov %esp, %eax + call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPT - call xen_maybe_preempt_hcall + call xen_maybe_preempt_hcall #endif - jmp ret_from_intr + jmp ret_from_intr ENDPROC(xen_hypervisor_callback) -# Hypervisor uses this for application faults while it executes. -# We get here for two reasons: -# 1. Fault while reloading DS, ES, FS or GS -# 2. Fault while executing IRET -# Category 1 we fix up by reattempting the load, and zeroing the segment -# register if the load fails. -# Category 2 we fix up by jumping to do_iret_error. We cannot use the -# normal Linux return path in this case because if we use the IRET hypercall -# to pop the stack frame we end up in an infinite loop of failsafe callbacks. -# We distinguish between categories by maintaining a status value in EAX. +/* + * Hypervisor uses this for application faults while it executes. + * We get here for two reasons: + * 1. Fault while reloading DS, ES, FS or GS + * 2. Fault while executing IRET + * Category 1 we fix up by reattempting the load, and zeroing the segment + * register if the load fails. + * Category 2 we fix up by jumping to do_iret_error. We cannot use the + * normal Linux return path in this case because if we use the IRET hypercall + * to pop the stack frame we end up in an infinite loop of failsafe callbacks. + * We distinguish between categories by maintaining a status value in EAX. + */ ENTRY(xen_failsafe_callback) - pushl %eax - movl $1,%eax -1: mov 4(%esp),%ds -2: mov 8(%esp),%es -3: mov 12(%esp),%fs -4: mov 16(%esp),%gs + pushl %eax + movl $1, %eax +1: mov 4(%esp), %ds +2: mov 8(%esp), %es +3: mov 12(%esp), %fs +4: mov 16(%esp), %gs /* EAX == 0 => Category 1 (Bad segment) EAX != 0 => Category 2 (Bad IRET) */ - testl %eax,%eax - popl %eax - lea 16(%esp),%esp - jz 5f - jmp iret_exc -5: pushl $-1 /* orig_ax = -1 => not a system call */ + testl %eax, %eax + popl %eax + lea 16(%esp), %esp + jz 5f + jmp iret_exc +5: pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL - jmp ret_from_exception + jmp ret_from_exception -.section .fixup,"ax" -6: xorl %eax,%eax - movl %eax,4(%esp) - jmp 1b -7: xorl %eax,%eax - movl %eax,8(%esp) - jmp 2b -8: xorl %eax,%eax - movl %eax,12(%esp) - jmp 3b -9: xorl %eax,%eax - movl %eax,16(%esp) - jmp 4b +.section .fixup, "ax" +6: xorl %eax, %eax + movl %eax, 4(%esp) + jmp 1b +7: xorl %eax, %eax + movl %eax, 8(%esp) + jmp 2b +8: xorl %eax, %eax + movl %eax, 12(%esp) + jmp 3b +9: xorl %eax, %eax + movl %eax, 16(%esp) + jmp 4b .previous - _ASM_EXTABLE(1b,6b) - _ASM_EXTABLE(2b,7b) - _ASM_EXTABLE(3b,8b) - _ASM_EXTABLE(4b,9b) + _ASM_EXTABLE(1b, 6b) + _ASM_EXTABLE(2b, 7b) + _ASM_EXTABLE(3b, 8b) + _ASM_EXTABLE(4b, 9b) ENDPROC(xen_failsafe_callback) BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) -#endif /* CONFIG_XEN */ +#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) @@ -910,28 +909,28 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) - pushl %eax - pushl %ecx - pushl %edx - pushl $0 /* Pass NULL as regs pointer */ - movl 4*4(%esp), %eax - movl 0x4(%ebp), %edx - movl function_trace_op, %ecx - subl $MCOUNT_INSN_SIZE, %eax + pushl %eax + pushl %ecx + pushl %edx + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax + movl 0x4(%ebp), %edx + movl function_trace_op, %ecx + subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: - call ftrace_stub + call ftrace_stub - addl $4,%esp /* skip NULL pointer */ - popl %edx - popl %ecx - popl %eax + addl $4, %esp /* skip NULL pointer */ + popl %edx + popl %ecx + popl %eax ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: - jmp ftrace_stub + jmp ftrace_stub #endif .globl ftrace_stub @@ -949,72 +948,72 @@ ENTRY(ftrace_regs_caller) * as the current return ip is. We move the return ip into the * ip location, and move flags into the return ip location. */ - pushl 4(%esp) /* save return ip into ip slot */ + pushl 4(%esp) /* save return ip into ip slot */ - pushl $0 /* Load 0 into orig_ax */ - pushl %gs - pushl %fs - pushl %es - pushl %ds - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx - movl 13*4(%esp), %eax /* Get the saved flags */ - movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ - /* clobbering return ip */ - movl $__KERNEL_CS,13*4(%esp) + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS, 13*4(%esp) - movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ - movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ - movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ - pushl %esp /* Save pt_regs as 4th parameter */ + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ GLOBAL(ftrace_regs_call) - call ftrace_stub + call ftrace_stub - addl $4, %esp /* Skip pt_regs */ - movl 14*4(%esp), %eax /* Move flags back into cs */ - movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ - movl 12*4(%esp), %eax /* Get return ip from regs->ip */ - movl %eax, 14*4(%esp) /* Put return ip back for ret */ + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - popl %ds - popl %es - popl %fs - popl %gs - addl $8, %esp /* Skip orig_ax and ip */ - popf /* Pop flags at end (no addl to corrupt flags) */ - jmp ftrace_ret + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret popf - jmp ftrace_stub + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) - cmpl $__PAGE_OFFSET, %esp - jb ftrace_stub /* Paging not enabled yet? */ + cmpl $__PAGE_OFFSET, %esp + jb ftrace_stub /* Paging not enabled yet? */ - cmpl $ftrace_stub, ftrace_trace_function - jnz trace + cmpl $ftrace_stub, ftrace_trace_function + jnz trace #ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpl $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller + cmpl $ftrace_stub, ftrace_graph_return + jnz ftrace_graph_caller - cmpl $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller + cmpl $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: @@ -1022,92 +1021,92 @@ ftrace_stub: /* taken from glibc */ trace: - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - movl 0x4(%ebp), %edx - subl $MCOUNT_INSN_SIZE, %eax + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + call *ftrace_trace_function - popl %edx - popl %ecx - popl %eax - jmp ftrace_stub + popl %edx + popl %ecx + popl %eax + jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - lea 0x4(%ebp), %edx - movl (%ebp), %ecx - subl $MCOUNT_INSN_SIZE, %eax - call prepare_ftrace_return - popl %edx - popl %ecx - popl %eax + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + lea 0x4(%ebp), %edx + movl (%ebp), %ecx + subl $MCOUNT_INSN_SIZE, %eax + call prepare_ftrace_return + popl %edx + popl %ecx + popl %eax ret END(ftrace_graph_caller) .globl return_to_handler return_to_handler: - pushl %eax - pushl %edx - movl %ebp, %eax - call ftrace_return_to_handler - movl %eax, %ecx - popl %edx - popl %eax - jmp *%ecx + pushl %eax + pushl %edx + movl %ebp, %eax + call ftrace_return_to_handler + movl %eax, %ecx + popl %edx + popl %eax + jmp *%ecx #endif #ifdef CONFIG_TRACING ENTRY(trace_page_fault) ASM_CLAC - pushl $trace_do_page_fault - jmp error_code + pushl $trace_do_page_fault + jmp error_code END(trace_page_fault) #endif ENTRY(page_fault) ASM_CLAC - pushl $do_page_fault + pushl $do_page_fault ALIGN error_code: /* the function address is in %gs's slot on the stack */ - pushl %fs - pushl %es - pushl %ds - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx cld - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs + movl $(__KERNEL_PERCPU), %ecx + movl %ecx, %fs UNWIND_ESPFIX_STACK GS_TO_REG %ecx - movl PT_GS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + movl PT_GS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart REG_TO_PTGS %ecx SET_KERNEL_GS %ecx - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es TRACE_IRQS_OFF - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception + movl %esp, %eax # pt_regs pointer + call *%edi + jmp ret_from_exception END(page_fault) /* @@ -1124,28 +1123,28 @@ END(page_fault) * the instruction that would have done it for sysenter. */ .macro FIX_STACK offset ok label - cmpw $__KERNEL_CS, 4(%esp) - jne \ok + cmpw $__KERNEL_CS, 4(%esp) + jne \ok \label: - movl TSS_sysenter_sp0 + \offset(%esp), %esp + movl TSS_sysenter_sp0 + \offset(%esp), %esp pushfl - pushl $__KERNEL_CS - pushl $sysenter_past_esp + pushl $__KERNEL_CS + pushl $sysenter_past_esp .endm ENTRY(debug) ASM_CLAC - cmpl $entry_SYSENTER_32,(%esp) - jne debug_stack_correct + cmpl $entry_SYSENTER_32, (%esp) + jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: - pushl $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception + xorl %edx, %edx # error code 0 + movl %esp, %eax # pt_regs pointer + call do_debug + jmp ret_from_exception END(debug) /* @@ -1159,91 +1158,91 @@ END(debug) ENTRY(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 - pushl %eax - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - je nmi_espfix_stack + pushl %eax + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + je nmi_espfix_stack #endif - cmpl $entry_SYSENTER_32,(%esp) - je nmi_stack_fixup - pushl %eax - movl %esp,%eax - /* Do not access memory above the end of our stack page, + cmpl $entry_SYSENTER_32, (%esp) + je nmi_stack_fixup + pushl %eax + movl %esp, %eax + /* + * Do not access memory above the end of our stack page, * it might not exist. */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - jae nmi_stack_correct - cmpl $entry_SYSENTER_32,12(%esp) - je nmi_debug_stack_check + andl $(THREAD_SIZE-1), %eax + cmpl $(THREAD_SIZE-20), %eax + popl %eax + jae nmi_stack_correct + cmpl $entry_SYSENTER_32, 12(%esp) + je nmi_debug_stack_check nmi_stack_correct: - pushl %eax + pushl %eax SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_all_notrace + xorl %edx, %edx # zero error code + movl %esp, %eax # pt_regs pointer + call do_nmi + jmp restore_all_notrace nmi_stack_fixup: FIX_STACK 12, nmi_stack_correct, 1 - jmp nmi_stack_correct + jmp nmi_stack_correct nmi_debug_stack_check: - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct + cmpw $__KERNEL_CS, 16(%esp) + jne nmi_stack_correct + cmpl $debug, (%esp) + jb nmi_stack_correct + cmpl $debug_esp_fix_insn, (%esp) + ja nmi_stack_correct FIX_STACK 24, nmi_stack_correct, 1 - jmp nmi_stack_correct + jmp nmi_stack_correct #ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: /* * create the pointer to lss back */ - pushl %ss - pushl %esp - addl $4, (%esp) + pushl %ss + pushl %esp + addl $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 - pushl 16(%esp) + pushl 16(%esp) .endr - pushl %eax + pushl %eax SAVE_ALL - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx, %edx # zero error code + call do_nmi RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - jmp irq_return + lss 12+4(%esp), %esp # back to espfix stack + jmp irq_return #endif END(nmi) ENTRY(int3) ASM_CLAC - pushl $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception + xorl %edx, %edx # zero error code + movl %esp, %eax # pt_regs pointer + call do_int3 + jmp ret_from_exception END(int3) ENTRY(general_protection) - pushl $do_general_protection - jmp error_code + pushl $do_general_protection + jmp error_code END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) ASM_CLAC - pushl $do_async_page_fault - jmp error_code + pushl $do_async_page_fault + jmp error_code END(async_page_fault) #endif - From 4d7321381e5c7102a3d3faf0a0a0035a09619612 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 20:43:07 +0200 Subject: [PATCH 471/481] x86/asm/entry/64: Clean up entry_64.S Make the 64-bit syscall entry code a bit more readable: - use consistent assembly coding style similar to the other entry_*.S files - remove old comments that are not true anymore - eliminate whitespace noise - use consistent vertical spacing - fix various comments - reorganize entry point generation tables to be more readable No code changed: # arch/x86/entry/entry_64.o: text data bss dec hex filename 12282 0 0 12282 2ffa entry_64.o.before 12282 0 0 12282 2ffa entry_64.o.after md5: cbab1f2d727a2a8a87618eeb79f391b7 entry_64.o.before.asm cbab1f2d727a2a8a87618eeb79f391b7 entry_64.o.after.asm Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 814 +++++++++++++++++++------------------- 1 file changed, 401 insertions(+), 413 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d2a0ed211bed..bd97161f90cb 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -4,26 +4,20 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs * Copyright (C) 2000 Pavel Machek - */ - -/* + * * entry.S contains the system-call and fault low-level handling routines. * * Some of this is documented in Documentation/x86/entry_64.txt * - * NOTE: This code handles signal-recognition, which happens every time - * after an interrupt and after each system call. - * * A note on terminology: - * - iret frame: Architecture defined interrupt frame from SS to RIP - * at the top of the kernel process stack. + * - iret frame: Architecture defined interrupt frame from SS to RIP + * at the top of the kernel process stack. * * Some macro usage: - * - ENTRY/END Define functions in the symbol table. - * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. - * - idtentry - Define exception entry points. + * - ENTRY/END: Define functions in the symbol table. + * - TRACE_IRQ_*: Trace hardirq state for lock debugging. + * - idtentry: Define exception entry points. */ - #include #include #include @@ -46,13 +40,12 @@ /* Avoid __ASSEMBLER__'ifying just for this. */ #include -#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_64BIT 0x80000000 -#define __AUDIT_ARCH_LE 0x40000000 - - .code64 - .section .entry.text, "ax" +#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define __AUDIT_ARCH_64BIT 0x80000000 +#define __AUDIT_ARCH_LE 0x40000000 +.code64 +.section .entry.text, "ax" #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret64) @@ -61,11 +54,10 @@ ENTRY(native_usergs_sysret64) ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ - .macro TRACE_IRQS_IRETQ #ifdef CONFIG_TRACE_IRQFLAGS - bt $9,EFLAGS(%rsp) /* interrupts off? */ - jnc 1f + bt $9, EFLAGS(%rsp) /* interrupts off? */ + jnc 1f TRACE_IRQS_ON 1: #endif @@ -85,34 +77,34 @@ ENDPROC(native_usergs_sysret64) #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) .macro TRACE_IRQS_OFF_DEBUG - call debug_stack_set_zero + call debug_stack_set_zero TRACE_IRQS_OFF - call debug_stack_reset + call debug_stack_reset .endm .macro TRACE_IRQS_ON_DEBUG - call debug_stack_set_zero + call debug_stack_set_zero TRACE_IRQS_ON - call debug_stack_reset + call debug_stack_reset .endm .macro TRACE_IRQS_IRETQ_DEBUG - bt $9,EFLAGS(%rsp) /* interrupts off? */ - jnc 1f + bt $9, EFLAGS(%rsp) /* interrupts off? */ + jnc 1f TRACE_IRQS_ON_DEBUG 1: .endm #else -# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF -# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON -# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ +# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF +# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON +# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ #endif /* - * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. + * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * - * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, + * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, * then loads new ss, cs, and rip from previously programmed MSRs. * rflags gets masked by a value from another MSR (so CLD and CLAC * are not needed). SYSCALL does not save anything on the stack @@ -128,7 +120,7 @@ ENDPROC(native_usergs_sysret64) * r10 arg3 (needs to be moved to rcx to conform to C ABI) * r8 arg4 * r9 arg5 - * (note: r12-r15,rbp,rbx are callee-preserved in C ABI) + * (note: r12-r15, rbp, rbx are callee-preserved in C ABI) * * Only called from user space. * @@ -151,12 +143,12 @@ ENTRY(entry_SYSCALL_64) */ GLOBAL(entry_SYSCALL_64_after_swapgs) - movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp + movq %rsp, PER_CPU_VAR(rsp_scratch) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ - pushq $__USER_DS /* pt_regs->ss */ - pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ + pushq $__USER_DS /* pt_regs->ss */ + pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ /* * Re-enable interrupts. * We use 'rsp_scratch' as a scratch space, hence irq-off block above @@ -165,34 +157,34 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) * with using rsp_scratch: */ ENABLE_INTERRUPTS(CLBR_NONE) - pushq %r11 /* pt_regs->flags */ - pushq $__USER_CS /* pt_regs->cs */ - pushq %rcx /* pt_regs->ip */ - pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ - pushq %r11 /* pt_regs->r11 */ - sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz tracesys + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz tracesys entry_SYSCALL_64_fastpath: #if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max,%rax + cmpq $__NR_syscall_max, %rax #else - andl $__SYSCALL_MASK,%eax - cmpl $__NR_syscall_max,%eax + andl $__SYSCALL_MASK, %eax + cmpl $__NR_syscall_max, %eax #endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10,%rcx - call *sys_call_table(,%rax,8) - movq %rax,RAX(%rsp) + ja 1f /* return -ENOSYS (already in pt_regs->ax) */ + movq %r10, %rcx + call *sys_call_table(, %rax, 8) + movq %rax, RAX(%rsp) 1: /* * Syscall return path ending with SYSRET (fast path). @@ -213,15 +205,15 @@ entry_SYSCALL_64_fastpath: * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is * very bad. */ - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ + testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ RESTORE_C_REGS_EXCEPT_RCX_R11 - movq RIP(%rsp),%rcx - movq EFLAGS(%rsp),%r11 - movq RSP(%rsp),%rsp + movq RIP(%rsp), %rcx + movq EFLAGS(%rsp), %r11 + movq RSP(%rsp), %rsp /* - * 64bit SYSRET restores rip from rcx, + * 64-bit SYSRET restores rip from rcx, * rflags from r11 (but RF and VM bits are forced to 0), * cs and ss are loaded from MSRs. * Restoration of rflags re-enables interrupts. @@ -239,21 +231,21 @@ entry_SYSCALL_64_fastpath: /* Do syscall entry tracing */ tracesys: - movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - call syscall_trace_enter_phase1 - test %rax, %rax - jnz tracesys_phase2 /* if needed, run the slow path */ - RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ - movq ORIG_RAX(%rsp), %rax - jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ + movq %rsp, %rdi + movl $AUDIT_ARCH_X86_64, %esi + call syscall_trace_enter_phase1 + test %rax, %rax + jnz tracesys_phase2 /* if needed, run the slow path */ + RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ + movq ORIG_RAX(%rsp), %rax + jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ tracesys_phase2: SAVE_EXTRA_REGS - movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - movq %rax,%rdx - call syscall_trace_enter_phase2 + movq %rsp, %rdi + movl $AUDIT_ARCH_X86_64, %esi + movq %rax, %rdx + call syscall_trace_enter_phase2 /* * Reload registers from stack in case ptrace changed them. @@ -263,15 +255,15 @@ tracesys_phase2: RESTORE_C_REGS_EXCEPT_RAX RESTORE_EXTRA_REGS #if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max,%rax + cmpq $__NR_syscall_max, %rax #else - andl $__SYSCALL_MASK,%eax - cmpl $__NR_syscall_max,%eax + andl $__SYSCALL_MASK, %eax + cmpl $__NR_syscall_max, %eax #endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10,%rcx /* fixup for C */ - call *sys_call_table(,%rax,8) - movq %rax,RAX(%rsp) + ja 1f /* return -ENOSYS (already in pt_regs->ax) */ + movq %r10, %rcx /* fixup for C */ + call *sys_call_table(, %rax, 8) + movq %rax, RAX(%rsp) 1: /* Use IRET because user could have changed pt_regs->foo */ @@ -283,31 +275,33 @@ GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ TRACE_IRQS_OFF - movl $_TIF_ALLWORK_MASK,%edi + movl $_TIF_ALLWORK_MASK, %edi /* edi: mask to check */ GLOBAL(int_with_check) LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%edx - andl %edi,%edx - jnz int_careful - andl $~TS_COMPAT,TI_status(%rcx) + movl TI_flags(%rcx), %edx + andl %edi, %edx + jnz int_careful + andl $~TS_COMPAT, TI_status(%rcx) jmp syscall_return - /* Either reschedule or signal or syscall exit tracking needed. */ - /* First do a reschedule test. */ - /* edx: work, edi: workmask */ + /* + * Either reschedule or signal or syscall exit tracking needed. + * First do a reschedule test. + * edx: work, edi: workmask + */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + bt $TIF_NEED_RESCHED, %edx + jnc int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq %rdi + pushq %rdi SCHEDULE_USER - popq %rdi + popq %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - jmp int_with_check + jmp int_with_check /* handle signals and tracing -- both require a full pt_regs */ int_very_careful: @@ -315,27 +309,27 @@ int_very_careful: ENABLE_INTERRUPTS(CLBR_NONE) SAVE_EXTRA_REGS /* Check for syscall exit trace */ - testl $_TIF_WORK_SYSCALL_EXIT,%edx - jz int_signal - pushq %rdi - leaq 8(%rsp),%rdi # &ptregs -> arg1 - call syscall_trace_leave - popq %rdi - andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi - jmp int_restore_rest + testl $_TIF_WORK_SYSCALL_EXIT, %edx + jz int_signal + pushq %rdi + leaq 8(%rsp), %rdi /* &ptregs -> arg1 */ + call syscall_trace_leave + popq %rdi + andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi + jmp int_restore_rest int_signal: - testl $_TIF_DO_NOTIFY_MASK,%edx - jz 1f - movq %rsp,%rdi # &ptregs -> arg1 - xorl %esi,%esi # oldset -> arg2 - call do_notify_resume -1: movl $_TIF_WORK_MASK,%edi + testl $_TIF_DO_NOTIFY_MASK, %edx + jz 1f + movq %rsp, %rdi /* &ptregs -> arg1 */ + xorl %esi, %esi /* oldset -> arg2 */ + call do_notify_resume +1: movl $_TIF_WORK_MASK, %edi int_restore_rest: RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - jmp int_with_check + jmp int_with_check syscall_return: /* The IRETQ could re-enable interrupts: */ @@ -346,10 +340,10 @@ syscall_return: * Try to use SYSRET instead of IRET if we're returning to * a completely clean 64-bit userspace context. */ - movq RCX(%rsp),%rcx - movq RIP(%rsp),%r11 - cmpq %rcx,%r11 /* RCX == RIP */ - jne opportunistic_sysret_failed + movq RCX(%rsp), %rcx + movq RIP(%rsp), %r11 + cmpq %rcx, %r11 /* RCX == RIP */ + jne opportunistic_sysret_failed /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP @@ -362,19 +356,21 @@ syscall_return: .ifne __VIRTUAL_MASK_SHIFT - 47 .error "virtual address width changed -- SYSRET checks need update" .endif + /* Change top 16 bits to be the sign-extension of 47th bit */ shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + /* If this changed %rcx, it was not canonical */ cmpq %rcx, %r11 jne opportunistic_sysret_failed - cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ - jne opportunistic_sysret_failed + cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ + jne opportunistic_sysret_failed - movq R11(%rsp),%r11 - cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */ - jne opportunistic_sysret_failed + movq R11(%rsp), %r11 + cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ + jne opportunistic_sysret_failed /* * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, @@ -383,29 +379,29 @@ syscall_return: * with register state that satisfies the opportunistic SYSRET * conditions. For example, single-stepping this user code: * - * movq $stuck_here,%rcx + * movq $stuck_here, %rcx * pushfq * popq %r11 * stuck_here: * * would never get past 'stuck_here'. */ - testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 - jnz opportunistic_sysret_failed + testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 + jnz opportunistic_sysret_failed /* nothing to check for RSP */ - cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */ - jne opportunistic_sysret_failed + cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ + jne opportunistic_sysret_failed /* - * We win! This label is here just for ease of understanding - * perf profiles. Nothing jumps here. + * We win! This label is here just for ease of understanding + * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 - movq RSP(%rsp),%rsp + movq RSP(%rsp), %rsp USERGS_SYSRET64 opportunistic_sysret_failed: @@ -417,7 +413,7 @@ END(entry_SYSCALL_64) .macro FORK_LIKE func ENTRY(stub_\func) SAVE_EXTRA_REGS 8 - jmp sys_\func + jmp sys_\func END(stub_\func) .endm @@ -436,7 +432,7 @@ return_from_execve: /* must use IRET code path (pt_regs->cs may have changed) */ addq $8, %rsp ZERO_EXTRA_REGS - movq %rax,RAX(%rsp) + movq %rax, RAX(%rsp) jmp int_ret_from_sys_call END(stub_execve) /* @@ -479,19 +475,19 @@ ENTRY(stub_rt_sigreturn) * we SAVE_EXTRA_REGS here. */ SAVE_EXTRA_REGS 8 - call sys_rt_sigreturn + call sys_rt_sigreturn return_from_stub: addq $8, %rsp RESTORE_EXTRA_REGS - movq %rax,RAX(%rsp) - jmp int_ret_from_sys_call + movq %rax, RAX(%rsp) + jmp int_ret_from_sys_call END(stub_rt_sigreturn) #ifdef CONFIG_X86_X32_ABI ENTRY(stub_x32_rt_sigreturn) SAVE_EXTRA_REGS 8 - call sys32_x32_rt_sigreturn - jmp return_from_stub + call sys32_x32_rt_sigreturn + jmp return_from_stub END(stub_x32_rt_sigreturn) #endif @@ -502,16 +498,16 @@ END(stub_x32_rt_sigreturn) */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK,TI_flags(%r8) + LOCK ; btr $TIF_FORK, TI_flags(%r8) - pushq $0x0002 - popfq # reset kernel eflags + pushq $0x0002 + popfq /* reset kernel eflags */ - call schedule_tail # rdi: 'prev' task parameter + call schedule_tail /* rdi: 'prev' task parameter */ RESTORE_EXTRA_REGS - testb $3, CS(%rsp) # from kernel_thread? + testb $3, CS(%rsp) /* from kernel_thread? */ /* * By the time we get here, we have no idea whether our pt_regs, @@ -522,13 +518,15 @@ ENTRY(ret_from_fork) */ jnz int_ret_from_sys_call - /* We came from kernel_thread */ - /* nb: we depend on RESTORE_EXTRA_REGS above */ - movq %rbp, %rdi - call *%rbx - movl $0, RAX(%rsp) + /* + * We came from kernel_thread + * nb: we depend on RESTORE_EXTRA_REGS above + */ + movq %rbp, %rdi + call *%rbx + movl $0, RAX(%rsp) RESTORE_EXTRA_REGS - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call END(ret_from_fork) /* @@ -539,7 +537,7 @@ END(ret_from_fork) ENTRY(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushq $(~vector+0x80) /* Note: always in signed byte range */ + pushq $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt .align 8 @@ -569,7 +567,7 @@ END(irq_entries_start) /* this goes to 0(%rsp) for unwinder, not for saving the value: */ SAVE_EXTRA_REGS_RBP -RBP - leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ + leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */ testb $3, CS-RBP(%rsp) jz 1f @@ -582,14 +580,14 @@ END(irq_entries_start) * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ - movq %rsp, %rsi - incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - pushq %rsi + movq %rsp, %rsi + incl PER_CPU_VAR(irq_count) + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + pushq %rsi /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF - call \func + call \func .endm /* @@ -599,36 +597,35 @@ END(irq_entries_start) .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: ASM_CLAC - addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ interrupt do_IRQ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl PER_CPU_VAR(irq_count) + decl PER_CPU_VAR(irq_count) /* Restore saved previous stack */ - popq %rsi + popq %rsi /* return code expects complete pt_regs - adjust rsp accordingly: */ - leaq -RBP(%rsi),%rsp + leaq -RBP(%rsi), %rsp testb $3, CS(%rsp) jz retint_kernel /* Interrupt came from user space */ retint_user: GET_THREAD_INFO(%rcx) - /* - * %rcx: thread info. Interrupts off. - */ + + /* %rcx: thread info. Interrupts are off. */ retint_with_reschedule: - movl $_TIF_WORK_MASK,%edi + movl $_TIF_WORK_MASK, %edi retint_check: LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - andl %edi,%edx - jnz retint_careful + movl TI_flags(%rcx), %edx + andl %edi, %edx + jnz retint_careful -retint_swapgs: /* return to user-space */ +retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ @@ -643,9 +640,9 @@ retint_kernel: #ifdef CONFIG_PREEMPT /* Interrupts are off */ /* Check if we need preemption */ - bt $9,EFLAGS(%rsp) /* interrupts were off? */ + bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f -0: cmpl $0,PER_CPU_VAR(__preempt_count) +0: cmpl $0, PER_CPU_VAR(__preempt_count) jnz 1f call preempt_schedule_irq jmp 0b @@ -671,8 +668,8 @@ ENTRY(native_iret) * 64-bit mode SS:RSP on the exception stack is always valid. */ #ifdef CONFIG_X86_ESPFIX64 - testb $4,(SS-RIP)(%rsp) - jnz native_irq_return_ldt + testb $4, (SS-RIP)(%rsp) + jnz native_irq_return_ldt #endif .global native_irq_return_iret @@ -687,59 +684,59 @@ native_irq_return_iret: #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: - pushq %rax - pushq %rdi + pushq %rax + pushq %rdi SWAPGS - movq PER_CPU_VAR(espfix_waddr),%rdi - movq %rax,(0*8)(%rdi) /* RAX */ - movq (2*8)(%rsp),%rax /* RIP */ - movq %rax,(1*8)(%rdi) - movq (3*8)(%rsp),%rax /* CS */ - movq %rax,(2*8)(%rdi) - movq (4*8)(%rsp),%rax /* RFLAGS */ - movq %rax,(3*8)(%rdi) - movq (6*8)(%rsp),%rax /* SS */ - movq %rax,(5*8)(%rdi) - movq (5*8)(%rsp),%rax /* RSP */ - movq %rax,(4*8)(%rdi) - andl $0xffff0000,%eax - popq %rdi - orq PER_CPU_VAR(espfix_stack),%rax + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* RAX */ + movq (2*8)(%rsp), %rax /* RIP */ + movq %rax, (1*8)(%rdi) + movq (3*8)(%rsp), %rax /* CS */ + movq %rax, (2*8)(%rdi) + movq (4*8)(%rsp), %rax /* RFLAGS */ + movq %rax, (3*8)(%rdi) + movq (6*8)(%rsp), %rax /* SS */ + movq %rax, (5*8)(%rdi) + movq (5*8)(%rsp), %rax /* RSP */ + movq %rax, (4*8)(%rdi) + andl $0xffff0000, %eax + popq %rdi + orq PER_CPU_VAR(espfix_stack), %rax SWAPGS - movq %rax,%rsp - popq %rax - jmp native_irq_return_iret + movq %rax, %rsp + popq %rax + jmp native_irq_return_iret #endif /* edi: workmask, edx: work */ retint_careful: - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + bt $TIF_NEED_RESCHED, %edx + jnc retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq %rdi + pushq %rdi SCHEDULE_USER - popq %rdi + popq %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - jmp retint_check + jmp retint_check retint_signal: - testl $_TIF_DO_NOTIFY_MASK,%edx - jz retint_swapgs + testl $_TIF_DO_NOTIFY_MASK, %edx + jz retint_swapgs TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) SAVE_EXTRA_REGS - movq $-1,ORIG_RAX(%rsp) - xorl %esi,%esi # oldset - movq %rsp,%rdi # &pt_regs - call do_notify_resume + movq $-1, ORIG_RAX(%rsp) + xorl %esi, %esi /* oldset */ + movq %rsp, %rdi /* &pt_regs */ + call do_notify_resume RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) - jmp retint_with_reschedule + jmp retint_with_reschedule END(common_interrupt) @@ -749,10 +746,10 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) ASM_CLAC - pushq $~(\num) + pushq $~(\num) .Lcommon_\sym: interrupt \do_sym - jmp ret_from_intr + jmp ret_from_intr END(\sym) .endm @@ -774,60 +771,45 @@ trace_apicinterrupt \num \sym .endm #ifdef CONFIG_SMP -apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \ - irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt -apicinterrupt3 REBOOT_VECTOR \ - reboot_interrupt smp_reboot_interrupt +apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt +apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt #endif #ifdef CONFIG_X86_UV -apicinterrupt3 UV_BAU_MESSAGE \ - uv_bau_message_intr1 uv_bau_message_interrupt +apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt #endif -apicinterrupt LOCAL_TIMER_VECTOR \ - apic_timer_interrupt smp_apic_timer_interrupt -apicinterrupt X86_PLATFORM_IPI_VECTOR \ - x86_platform_ipi smp_x86_platform_ipi + +apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt +apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_HAVE_KVM -apicinterrupt3 POSTED_INTR_VECTOR \ - kvm_posted_intr_ipi smp_kvm_posted_intr_ipi -apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \ - kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi +apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD -apicinterrupt THRESHOLD_APIC_VECTOR \ - threshold_interrupt smp_threshold_interrupt +apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt #endif #ifdef CONFIG_X86_MCE_AMD -apicinterrupt DEFERRED_ERROR_VECTOR \ - deferred_error_interrupt smp_deferred_error_interrupt +apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt #endif #ifdef CONFIG_X86_THERMAL_VECTOR -apicinterrupt THERMAL_APIC_VECTOR \ - thermal_interrupt smp_thermal_interrupt +apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt #endif #ifdef CONFIG_SMP -apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ - call_function_single_interrupt smp_call_function_single_interrupt -apicinterrupt CALL_FUNCTION_VECTOR \ - call_function_interrupt smp_call_function_interrupt -apicinterrupt RESCHEDULE_VECTOR \ - reschedule_interrupt smp_reschedule_interrupt +apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt +apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt +apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif -apicinterrupt ERROR_APIC_VECTOR \ - error_interrupt smp_error_interrupt -apicinterrupt SPURIOUS_APIC_VECTOR \ - spurious_interrupt smp_spurious_interrupt +apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt +apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt #ifdef CONFIG_IRQ_WORK -apicinterrupt IRQ_WORK_VECTOR \ - irq_work_interrupt smp_irq_work_interrupt +apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt #endif /* @@ -846,54 +828,54 @@ ENTRY(\sym) PARAVIRT_ADJUST_EXCEPTION_FRAME .ifeq \has_error_code - pushq $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif ALLOC_PT_GPREGS_ON_STACK .if \paranoid .if \paranoid == 1 - testb $3, CS(%rsp) /* If coming from userspace, switch */ - jnz 1f /* stacks. */ + testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ + jnz 1f .endif - call paranoid_entry + call paranoid_entry .else - call error_entry + call error_entry .endif /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ .if \paranoid .if \shift_ist != -1 - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ + TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ .else TRACE_IRQS_OFF .endif .endif - movq %rsp,%rdi /* pt_regs pointer */ + movq %rsp, %rdi /* pt_regs pointer */ .if \has_error_code - movq ORIG_RAX(%rsp),%rsi /* get error code */ - movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + movq ORIG_RAX(%rsp), %rsi /* get error code */ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ .else - xorl %esi,%esi /* no error code */ + xorl %esi, %esi /* no error code */ .endif .if \shift_ist != -1 - subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) + subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) .endif - call \do_sym + call \do_sym .if \shift_ist != -1 - addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) + addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) .endif /* these procedures expect "no swapgs" flag in ebx */ .if \paranoid - jmp paranoid_exit + jmp paranoid_exit .else - jmp error_exit + jmp error_exit .endif .if \paranoid == 1 @@ -903,25 +885,25 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ 1: - call error_entry + call error_entry - movq %rsp,%rdi /* pt_regs pointer */ - call sync_regs - movq %rax,%rsp /* switch stack */ + movq %rsp, %rdi /* pt_regs pointer */ + call sync_regs + movq %rax, %rsp /* switch stack */ - movq %rsp,%rdi /* pt_regs pointer */ + movq %rsp, %rdi /* pt_regs pointer */ .if \has_error_code - movq ORIG_RAX(%rsp),%rsi /* get error code */ - movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + movq ORIG_RAX(%rsp), %rsi /* get error code */ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ .else - xorl %esi,%esi /* no error code */ + xorl %esi, %esi /* no error code */ .endif - call \do_sym + call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit /* %ebx: no swapgs flag */ .endif END(\sym) .endm @@ -937,55 +919,57 @@ idtentry \sym \do_sym has_error_code=\has_error_code .endm #endif -idtentry divide_error do_divide_error has_error_code=0 -idtentry overflow do_overflow has_error_code=0 -idtentry bounds do_bounds has_error_code=0 -idtentry invalid_op do_invalid_op has_error_code=0 -idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 -idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 -idtentry invalid_TSS do_invalid_TSS has_error_code=1 -idtentry segment_not_present do_segment_not_present has_error_code=1 -idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 -idtentry coprocessor_error do_coprocessor_error has_error_code=0 -idtentry alignment_check do_alignment_check has_error_code=1 -idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 +idtentry divide_error do_divide_error has_error_code=0 +idtentry overflow do_overflow has_error_code=0 +idtentry bounds do_bounds has_error_code=0 +idtentry invalid_op do_invalid_op has_error_code=0 +idtentry device_not_available do_device_not_available has_error_code=0 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 +idtentry invalid_TSS do_invalid_TSS has_error_code=1 +idtentry segment_not_present do_segment_not_present has_error_code=1 +idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 +idtentry coprocessor_error do_coprocessor_error has_error_code=0 +idtentry alignment_check do_alignment_check has_error_code=1 +idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 - /* Reload gs selector with exception handling */ - /* edi: new selector */ + /* + * Reload gs selector with exception handling + * edi: new selector + */ ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: - movl %edi,%gs -2: mfence /* workaround */ + movl %edi, %gs +2: mfence /* workaround */ SWAPGS popfq ret END(native_load_gs_index) - _ASM_EXTABLE(gs_change,bad_gs) - .section .fixup,"ax" + _ASM_EXTABLE(gs_change, bad_gs) + .section .fixup, "ax" /* running with kernelgs */ bad_gs: - SWAPGS /* switch back to user gs */ - xorl %eax,%eax - movl %eax,%gs - jmp 2b + SWAPGS /* switch back to user gs */ + xorl %eax, %eax + movl %eax, %gs + jmp 2b .previous /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) - pushq %rbp - mov %rsp,%rbp - incl PER_CPU_VAR(irq_count) - cmove PER_CPU_VAR(irq_stack_ptr),%rsp - push %rbp # backlink for old unwinder - call __do_softirq + pushq %rbp + mov %rsp, %rbp + incl PER_CPU_VAR(irq_count) + cmove PER_CPU_VAR(irq_stack_ptr), %rsp + push %rbp /* frame pointer backlink */ + call __do_softirq leaveq - decl PER_CPU_VAR(irq_count) + decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) @@ -1005,23 +989,24 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 * existing activation in its critical region -- if so, we pop the current * activation and restart the handler using the previous one. */ -ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) +ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ + /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ - movq %rdi, %rsp # we don't return, adjust the stack frame -11: incl PER_CPU_VAR(irq_count) - movq %rsp,%rbp - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - pushq %rbp # backlink for old unwinder - call xen_evtchn_do_upcall - popq %rsp - decl PER_CPU_VAR(irq_count) + movq %rdi, %rsp /* we don't return, adjust the stack frame */ +11: incl PER_CPU_VAR(irq_count) + movq %rsp, %rbp + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + pushq %rbp /* frame pointer backlink */ + call xen_evtchn_do_upcall + popq %rsp + decl PER_CPU_VAR(irq_count) #ifndef CONFIG_PREEMPT - call xen_maybe_preempt_hcall + call xen_maybe_preempt_hcall #endif - jmp error_exit + jmp error_exit END(xen_do_hypervisor_callback) /* @@ -1038,35 +1023,35 @@ END(xen_do_hypervisor_callback) * with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) - movl %ds,%ecx - cmpw %cx,0x10(%rsp) - jne 1f - movl %es,%ecx - cmpw %cx,0x18(%rsp) - jne 1f - movl %fs,%ecx - cmpw %cx,0x20(%rsp) - jne 1f - movl %gs,%ecx - cmpw %cx,0x28(%rsp) - jne 1f + movl %ds, %ecx + cmpw %cx, 0x10(%rsp) + jne 1f + movl %es, %ecx + cmpw %cx, 0x18(%rsp) + jne 1f + movl %fs, %ecx + cmpw %cx, 0x20(%rsp) + jne 1f + movl %gs, %ecx + cmpw %cx, 0x28(%rsp) + jne 1f /* All segments match their saved values => Category 2 (Bad IRET). */ - movq (%rsp),%rcx - movq 8(%rsp),%r11 - addq $0x30,%rsp - pushq $0 /* RIP */ - pushq %r11 - pushq %rcx - jmp general_protection + movq (%rsp), %rcx + movq 8(%rsp), %r11 + addq $0x30, %rsp + pushq $0 /* RIP */ + pushq %r11 + pushq %rcx + jmp general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ - movq (%rsp),%rcx - movq 8(%rsp),%r11 - addq $0x30,%rsp - pushq $-1 /* orig_ax = -1 => not a system call */ + movq (%rsp), %rcx + movq 8(%rsp), %r11 + addq $0x30, %rsp + pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS - jmp error_exit + jmp error_exit END(xen_failsafe_callback) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1079,21 +1064,25 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry stack_segment do_stack_segment has_error_code=1 +idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry stack_segment do_stack_segment has_error_code=1 + #ifdef CONFIG_XEN -idtentry xen_debug do_debug has_error_code=0 -idtentry xen_int3 do_int3 has_error_code=0 -idtentry xen_stack_segment do_stack_segment has_error_code=1 +idtentry xen_debug do_debug has_error_code=0 +idtentry xen_int3 do_int3 has_error_code=0 +idtentry xen_stack_segment do_stack_segment has_error_code=1 #endif -idtentry general_protection do_general_protection has_error_code=1 -trace_idtentry page_fault do_page_fault has_error_code=1 + +idtentry general_protection do_general_protection has_error_code=1 +trace_idtentry page_fault do_page_fault has_error_code=1 + #ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 +idtentry async_page_fault do_async_page_fault has_error_code=1 #endif + #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) #endif /* @@ -1105,13 +1094,13 @@ ENTRY(paranoid_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 - movl $1,%ebx - movl $MSR_GS_BASE,%ecx + movl $1, %ebx + movl $MSR_GS_BASE, %ecx rdmsr - testl %edx,%edx - js 1f /* negative -> in kernel */ + testl %edx, %edx + js 1f /* negative -> in kernel */ SWAPGS - xorl %ebx,%ebx + xorl %ebx, %ebx 1: ret END(paranoid_entry) @@ -1124,16 +1113,17 @@ END(paranoid_entry) * in syscall entry), so checking for preemption here would * be complicated. Fortunately, we there's no good reason * to try to handle preemption here. + * + * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ -/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_exit_no_swapgs + testl %ebx, %ebx /* swapgs needed? */ + jnz paranoid_exit_no_swapgs TRACE_IRQS_IRETQ SWAPGS_UNSAFE_STACK - jmp paranoid_exit_restore + jmp paranoid_exit_restore paranoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG paranoid_exit_restore: @@ -1151,7 +1141,7 @@ ENTRY(error_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 - xorl %ebx,%ebx + xorl %ebx, %ebx testb $3, CS+8(%rsp) jz error_kernelspace error_swapgs: @@ -1167,41 +1157,41 @@ error_sti: * for these here too. */ error_kernelspace: - incl %ebx - leaq native_irq_return_iret(%rip),%rcx - cmpq %rcx,RIP+8(%rsp) - je error_bad_iret - movl %ecx,%eax /* zero extend */ - cmpq %rax,RIP+8(%rsp) - je bstep_iret - cmpq $gs_change,RIP+8(%rsp) - je error_swapgs - jmp error_sti + incl %ebx + leaq native_irq_return_iret(%rip), %rcx + cmpq %rcx, RIP+8(%rsp) + je error_bad_iret + movl %ecx, %eax /* zero extend */ + cmpq %rax, RIP+8(%rsp) + je bstep_iret + cmpq $gs_change, RIP+8(%rsp) + je error_swapgs + jmp error_sti bstep_iret: /* Fix truncated RIP */ - movq %rcx,RIP+8(%rsp) + movq %rcx, RIP+8(%rsp) /* fall through */ error_bad_iret: SWAPGS - mov %rsp,%rdi - call fixup_bad_iret - mov %rax,%rsp - decl %ebx /* Return to usergs */ - jmp error_sti + mov %rsp, %rdi + call fixup_bad_iret + mov %rax, %rsp + decl %ebx /* Return to usergs */ + jmp error_sti END(error_entry) /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(error_exit) - movl %ebx,%eax + movl %ebx, %eax RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl %eax,%eax - jnz retint_kernel - jmp retint_user + testl %eax, %eax + jnz retint_kernel + jmp retint_user END(error_exit) /* Runs on exception stack */ @@ -1240,21 +1230,21 @@ ENTRY(nmi) */ /* Use %rdx as our temp variable throughout */ - pushq %rdx + pushq %rdx /* * If %cs was not the kernel segment, then the NMI triggered in user * space, which means it is definitely not nested. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + cmpl $__KERNEL_CS, 16(%rsp) + jne first_nmi /* * Check the special variable on the stack to see if NMIs are * executing. */ - cmpl $1, -8(%rsp) - je nested_nmi + cmpl $1, -8(%rsp) + je nested_nmi /* * Now test if the previous stack was an NMI stack. @@ -1268,6 +1258,7 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If the stack pointer is above the NMI stack, this is a normal NMI */ ja first_nmi + subq $EXCEPTION_STKSZ, %rdx cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ @@ -1280,29 +1271,29 @@ nested_nmi: * It's about to repeat the NMI handler, so we are fine * with ignoring this one. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out 1: /* Set up the interrupted NMIs stack to jump to repeat_nmi */ - leaq -1*8(%rsp), %rdx - movq %rdx, %rsp - leaq -10*8(%rsp), %rdx - pushq $__KERNEL_DS - pushq %rdx + leaq -1*8(%rsp), %rdx + movq %rdx, %rsp + leaq -10*8(%rsp), %rdx + pushq $__KERNEL_DS + pushq %rdx pushfq - pushq $__KERNEL_CS - pushq $repeat_nmi + pushq $__KERNEL_CS + pushq $repeat_nmi /* Put stack back */ - addq $(6*8), %rsp + addq $(6*8), %rsp nested_nmi_out: - popq %rdx + popq %rdx /* No need to check faults here */ INTERRUPT_RETURN @@ -1344,19 +1335,17 @@ first_nmi: * is also used by nested NMIs and can not be trusted on exit. */ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ - movq (%rsp), %rdx + movq (%rsp), %rdx /* Set the NMI executing variable on the stack. */ - pushq $1 + pushq $1 - /* - * Leave room for the "copied" frame - */ - subq $(5*8), %rsp + /* Leave room for the "copied" frame */ + subq $(5*8), %rsp /* Copy the stack frame to the Saved frame */ .rept 5 - pushq 11*8(%rsp) + pushq 11*8(%rsp) .endr /* Everything up to here is safe from nested NMIs */ @@ -1376,14 +1365,14 @@ repeat_nmi: * is benign for the non-repeat case, where 1 was pushed just above * to this very stack slot). */ - movq $1, 10*8(%rsp) + movq $1, 10*8(%rsp) /* Make another copy, this one may be modified by nested NMIs */ - addq $(10*8), %rsp + addq $(10*8), %rsp .rept 5 - pushq -6*8(%rsp) + pushq -6*8(%rsp) .endr - subq $(5*8), %rsp + subq $(5*8), %rsp end_repeat_nmi: /* @@ -1391,7 +1380,7 @@ end_repeat_nmi: * NMI if the first NMI took an exception and reset our iret stack * so that we repeat another NMI. */ - pushq $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK /* @@ -1401,7 +1390,7 @@ end_repeat_nmi: * setting NEED_RESCHED or anything that normal interrupts and * exceptions might do. */ - call paranoid_entry + call paranoid_entry /* * Save off the CR2 register. If we take a page fault in the NMI then @@ -1412,21 +1401,21 @@ end_repeat_nmi: * origin fault. Save it off and restore it if it changes. * Use the r12 callee-saved register. */ - movq %cr2, %r12 + movq %cr2, %r12 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp,%rdi - movq $-1,%rsi - call do_nmi + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 + movq %cr2, %rcx + cmpq %rcx, %r12 + je 1f + movq %r12, %cr2 1: - testl %ebx,%ebx /* swapgs needed? */ - jnz nmi_restore + testl %ebx, %ebx /* swapgs needed? */ + jnz nmi_restore nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: @@ -1436,12 +1425,11 @@ nmi_restore: REMOVE_PT_GPREGS_FROM_STACK 6*8 /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) + movq $0, 5*8(%rsp) INTERRUPT_RETURN END(nmi) ENTRY(ignore_sysret) - mov $-ENOSYS,%eax + mov $-ENOSYS, %eax sysret END(ignore_sysret) - From eb47854415825a69b1c578e7487da571227ba25a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 7 Jun 2015 20:24:30 +0200 Subject: [PATCH 472/481] x86/asm/entry/32: Reinstate clearing of pt_regs->r8..r11 on EFAULT path I broke this recently when I changed pt_regs->r8..r11 clearing logic in INT 80 code path. There is a branch from SYSENTER/SYSCALL code to INT 80 code: if we fail to retrieve arg6, we return EFAULT. Before this patch, in this case we don't clear pt_regs->r8..r11. This patch fixes this. The resulting code is smaller and simpler. While at it, remove incorrect comment about syscall dispatching CALL insn: it does not use RIP-relative addressing form (the comment was meant to be "TODO: make this rip-relative", and morphed since then, dropping "TODO"). Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433701470-28800-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 59840e33d203..5757ddec35d6 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -413,9 +413,7 @@ END(entry_SYSCALL_compat) ia32_badarg: ASM_CLAC - movq $-EFAULT, %rax - jmp ia32_sysret - + movq $-EFAULT, RAX(%rsp) ia32_ret_from_sys_call: xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) @@ -486,9 +484,7 @@ ia32_do_call: cmpq $(IA32_NR_syscalls-1), %rax ja 1f - call *ia32_sys_call_table(, %rax, 8) /* RIP relative */ - -ia32_sysret: + call *ia32_sys_call_table(, %rax, 8) movq %rax, RAX(%rsp) 1: jmp int_ret_from_sys_call From bace7117d3fb59a6ed7ea1aa6c8994df6a28a72a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 21:20:26 +0200 Subject: [PATCH 473/481] x86/asm/entry: (Re-)rename __NR_entry_INT80_compat_max to __NR_syscall_compat_max Brian Gerst noticed that I did a weird rename in the following commit: b2502b418e63 ("x86/asm/entry: Untangle 'system_call' into two entry points: entry_SYSCALL_64 and entry_INT80_32") which renamed __NR_ia32_syscall_max to __NR_entry_INT80_compat_max. Now the original name was a misnomer, but the new one is a misnomer as well, as all the 32-bit compat syscall entry points (sysenter, syscall) share the system call table, not just the INT80 based one. Rename it to __NR_syscall_compat_max. Reported-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/syscall_32.c | 6 +++--- arch/x86/kernel/asm-offsets_64.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index e398d033673f..8ea34f94e973 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,7 +10,7 @@ #else #define SYM(sym, compat) sym #define ia32_sys_call_table sys_call_table -#define __NR_entry_INT80_compat_max __NR_syscall_max +#define __NR_syscall_compat_max __NR_syscall_max #endif #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; @@ -23,11 +23,11 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -__visible const sys_call_ptr_t ia32_sys_call_table[__NR_entry_INT80_compat_max+1] = { +__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_entry_INT80_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, #include }; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 599afcf0005f..d8f42f902a0f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -66,7 +66,7 @@ int main(void) DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); - DEFINE(__NR_entry_INT80_compat_max, sizeof(syscalls_ia32) - 1); + DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1); DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); return 0; From 9b47feb708e50e6114b4b4193eee67f2e5af9d05 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 8 Jun 2015 22:35:33 +0200 Subject: [PATCH 474/481] x86/asm/entry: Clean up entry*.S style, final bits A few bits were missed. Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 18 +++++++++--------- arch/x86/entry/entry_64_compat.S | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index edd7aadfacfa..21dc60a60b5f 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -12,7 +12,7 @@ * 0(%esp) - %ebx * 4(%esp) - %ecx * 8(%esp) - %edx - * C(%esp) - %esi + * C(%esp) - %esi * 10(%esp) - %edi * 14(%esp) - %ebp * 18(%esp) - %eax @@ -128,7 +128,7 @@ .macro POP_GS pop=0 98: popl %gs .if \pop <> 0 - add $\pop, %esp + add $\pop, %esp .endif .endm .macro POP_GS_EX @@ -487,8 +487,8 @@ ldt_ss: mov %esp, %edx /* load kernel esp */ mov PT_OLDESP(%esp), %eax /* load userspace esp */ mov %dx, %ax /* eax: new kernel esp */ - sub %eax, %edx /* offset (low word is 0) */ - shr $16, %edx + sub %eax, %edx /* offset (low word is 0) */ + shr $16, %edx mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ pushl $__ESPFIX_SS @@ -507,7 +507,7 @@ ENDPROC(entry_INT80_32) # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: call schedule @@ -520,7 +520,7 @@ work_resched: andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testb $_TIF_NEED_RESCHED, %cl jnz work_resched work_notifysig: # deal with pending signals and @@ -537,8 +537,8 @@ work_notifysig: # deal with pending signals and TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl + andb $SEGMENT_RPL_MASK, %bl + cmpb $USER_RPL, %bl jb resume_kernel xorl %edx, %edx call do_notify_resume @@ -609,7 +609,7 @@ END(sysenter_badsys) /* fixup the stack */ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ - shl $16, %eax + shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 5757ddec35d6..2093ce6be203 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -106,8 +106,8 @@ ENTRY(entry_SYSENTER_compat) jnz sysenter_fix_flags sysenter_flags_fixed: - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz sysenter_tracesys sysenter_do_call: @@ -138,7 +138,7 @@ sysexit_from_sys_call: * This code path is still called 'sysexit' because it pairs * with 'sysenter' and it uses the SYSENTER calling convention. */ - andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp), %ecx /* User %eip */ RESTORE_RSI_RDI xorl %edx, %edx /* Do not leak kernel information */ @@ -229,7 +229,7 @@ sysexit_audit: #endif sysenter_fix_flags: - pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) popfq jmp sysenter_flags_fixed @@ -325,9 +325,9 @@ ENTRY(entry_SYSCALL_compat) 1: movl (%r8), %ebp _ASM_EXTABLE(1b, ia32_badarg) ASM_CLAC - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz cstar_tracesys + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz cstar_tracesys cstar_do_call: /* 32-bit syscall -> 64-bit C ABI argument conversion */ From aee4b013a71666f11ffeac11ab45bb7c6e0e394d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jun 2015 20:54:07 +0200 Subject: [PATCH 475/481] x86/asm/entry/32: Fix fallout from the R9 trick removal in the SYSCALL code I put %ebp restoration code too late. Under strace, it is not reached and %ebp is not restored upon return to userspace. This is the fix. Run-tested. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433876051-26604-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 2093ce6be203..2c44180d2fac 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -344,6 +344,7 @@ cstar_dispatch: call *ia32_sys_call_table(, %rax, 8) movq %rax, RAX(%rsp) 1: + movl RCX(%rsp), %ebp DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -351,7 +352,6 @@ cstar_dispatch: sysretl_from_sys_call: andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - movl RCX(%rsp), %ebp RESTORE_RSI_RDI_RDX movl RIP(%rsp), %ecx movl EFLAGS(%rsp), %r11d From 1536bb46fac7672ef04aaaa6a3b07848314263bc Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jun 2015 20:54:08 +0200 Subject: [PATCH 476/481] x86/asm/entry/32: Explain reloading of registers after __audit_syscall_entry() Here it is not obvious why we load pt_regs->cx to %esi etc. Lets improve comments. Explain that here we combine two things: first, we reload registers since some of them are clobbered by the C function we just called; and we also convert 32-bit syscall params to 64-bit C ABI, because we are going to jump back to syscall dispatch code. Move reloading of 6th argument into the macro instead of having it after each of two macro invocations. No actual code changes here. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433876051-26604-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 2c44180d2fac..0fa108cdf182 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -185,12 +185,18 @@ sysexit_from_sys_call: movl %ebx, %esi /* 2nd arg: 1st syscall arg */ movl %eax, %edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl ORIG_RAX(%rsp), %eax /* reload syscall number */ - movl %ebx, %edi /* reload 1st syscall arg */ - movl RCX(%rsp), %esi /* reload 2nd syscall arg */ - movl RDX(%rsp), %edx /* reload 3rd syscall arg */ - movl RSI(%rsp), %ecx /* reload 4th syscall arg */ - movl RDI(%rsp), %r8d /* reload 5th syscall arg */ + /* + * We are going to jump back to syscall dispatch. + * Prepare syscall args as required by 64-bit C ABI. + * Clobbered registers are loaded from pt_regs on stack. + */ + movl ORIG_RAX(%rsp), %eax /* syscall number */ + movl %ebx, %edi /* arg1 */ + movl RCX(%rsp), %esi /* arg2 */ + movl RDX(%rsp), %edx /* arg3 */ + movl RSI(%rsp), %ecx /* arg4 */ + movl RDI(%rsp), %r8d /* arg5 */ + movl %ebp, %r9d /* arg6 */ .endm .macro auditsys_exit exit @@ -221,7 +227,6 @@ sysexit_from_sys_call: sysenter_auditsys: auditsys_entry_common - movl %ebp, %r9d /* reload 6th syscall arg */ jmp sysenter_dispatch sysexit_audit: @@ -379,7 +384,6 @@ sysretl_from_sys_call: #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: auditsys_entry_common - movl %ebp, %r9d /* reload 6th syscall arg */ jmp cstar_dispatch sysretl_audit: From a92fde25231a89d7d10895482556260c1b63767d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jun 2015 20:54:09 +0200 Subject: [PATCH 477/481] x86/asm/entry/32: Shorten __audit_syscall_entry() args preparation We use three MOVs to swap edx and ecx. We can use one XCHG instead. Expand the comments. It's difficult to keep track which arg# every register corresponds to, so spell it out. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1433876051-26604-3-git-send-email-dvlasenk@redhat.com [ Expanded the comments some more. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0fa108cdf182..bb187a6a877c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -178,17 +178,26 @@ sysexit_from_sys_call: #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common - movl %esi, %r8d /* 5th arg: 4th syscall arg */ - movl %ecx, %r9d /* swap with edx */ - movl %edx, %ecx /* 4th arg: 3rd syscall arg */ - movl %r9d, %edx /* 3rd arg: 2nd syscall arg */ - movl %ebx, %esi /* 2nd arg: 1st syscall arg */ - movl %eax, %edi /* 1st arg: syscall number */ - call __audit_syscall_entry /* - * We are going to jump back to syscall dispatch. - * Prepare syscall args as required by 64-bit C ABI. - * Clobbered registers are loaded from pt_regs on stack. + * At this point, registers hold syscall args in the 32-bit syscall ABI: + * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP. + * + * We want to pass them to __audit_syscall_entry(), which is a 64-bit + * C function with 5 parameters, so shuffle them to match what + * the function expects: RDI,RSI,RDX,RCX,R8. + */ + movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */ + xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */ + /* arg3 (RDX) <= 2nd syscall arg (ECX) */ + movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */ + movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */ + call __audit_syscall_entry + + /* + * We are going to jump back to the syscall dispatch code. + * Prepare syscall args as required by the 64-bit C ABI. + * Registers clobbered by __audit_syscall_entry() are + * loaded from pt_regs on stack: */ movl ORIG_RAX(%rsp), %eax /* syscall number */ movl %ebx, %edi /* arg1 */ From 539f5113650068ba221197f190267ab727296ef5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 9 Jun 2015 12:36:01 -0700 Subject: [PATCH 478/481] x86/asm/entry/64: Disentangle error_entry/exit gsbase/ebx/usermode code The error_entry/error_exit code to handle gsbase and whether we return to user mdoe was a mess: - error_sti was misnamed. In particular, it did not enable interrupts. - Error handling for gs_change was hopelessly tangled the normal usermode path. Separate it out. This saves a branch in normal entries from kernel mode. - The comments were bad. Fix it up. As a nice side effect, there's now a code path that happens on error entries from user mode. We'll use it soon. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f1be898ab93360169fb845ab85185948832209ee.1433878454.git.luto@kernel.org [ Prettified it, clarified comments some more. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index bd97161f90cb..3bb2c4302df1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1135,7 +1135,7 @@ END(paranoid_exit) /* * Save all registers in pt_regs, and switch gs if needed. - * Return: ebx=0: need swapgs on exit, ebx=1: otherwise + * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) cld @@ -1144,9 +1144,11 @@ ENTRY(error_entry) xorl %ebx, %ebx testb $3, CS+8(%rsp) jz error_kernelspace -error_swapgs: + + /* We entered from user mode */ SWAPGS -error_sti: + +error_entry_done: TRACE_IRQS_OFF ret @@ -1165,8 +1167,15 @@ error_kernelspace: cmpq %rax, RIP+8(%rsp) je bstep_iret cmpq $gs_change, RIP+8(%rsp) - je error_swapgs - jmp error_sti + jne error_entry_done + + /* + * hack: gs_change can fail with user gsbase. If this happens, fix up + * gsbase and proceed. We'll fix up the exception and land in + * gs_change's error handler with kernel gsbase. + */ + SWAPGS + jmp error_entry_done bstep_iret: /* Fix truncated RIP */ @@ -1174,16 +1183,30 @@ bstep_iret: /* fall through */ error_bad_iret: + /* + * We came from an IRET to user mode, so we have user gsbase. + * Switch to kernel gsbase: + */ SWAPGS + + /* + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET and clear EBX so that + * error_exit knows that we will be returning to user mode. + */ mov %rsp, %rdi call fixup_bad_iret mov %rax, %rsp - decl %ebx /* Return to usergs */ - jmp error_sti + decl %ebx + jmp error_entry_done END(error_entry) -/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ +/* + * On entry, EBS is a "return to kernel mode" flag: + * 1: already in kernel mode, don't need SWAPGS + * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode + */ ENTRY(error_exit) movl %ebx, %eax RESTORE_EXTRA_REGS From b58d930750135d6c5b8e5aa084c0e9303c78c286 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 15 Jun 2015 17:40:01 +0800 Subject: [PATCH 479/481] x86/platform/intel/baytrail: Add comments about why we disabled HPET on Baytrail This question has been asked many times, and finally I found the official document which explains the problem of HPET on Baytrail, that it will halt in deep idle states. Signed-off-by: Feng Tang Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: john.stultz@linaro.org Cc: len.brown@intel.com Cc: matthew.lee@intel.com Link: http://lkml.kernel.org/r/1434361201-31743-1-git-send-email-feng.tang@intel.com [ Prettified things a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index fe9f0b79a18b..5cb9a4d6f623 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -627,8 +627,12 @@ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, QFLAG_APPLY_ONCE, intel_graphics_stolen }, /* - * HPET on current version of Baytrail platform has accuracy - * problems, disable it for now: + * HPET on the current version of the Baytrail platform has accuracy + * problems: it will halt in deep idle state - so we disable it. + * + * More details can be found in section 18.10.1.3 of the datasheet: + * + * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/atom-z8000-datasheet-vol-1.pdf */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, From 1bf1735b478008c30acaff18ec6f4a3ff211c28a Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 15 Jun 2015 10:28:16 +0200 Subject: [PATCH 480/481] x86/mm/pat, drivers/media/ivtv: Use arch_phys_wc_add() and require PAT disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are burrying direct access to MTRR code support on x86 in order to take advantage of PAT. In the future, we also want to make the default behavior of ioremap_nocache() to use strong UC, at which point the use of mtrr_add() on those systems would make write-combining void. In order to help both enable us to later make strong UC default and in order to phase out direct MTRR access code, port the driver over to the arch_phys_wc_add() API and annotate that the device driver requires systems to boot with PAT disabled, with the 'nopat' kernel parameter. This is a workable compromise given that the hardware is really rare these days, and perhaps only some lost souls stuck with obsolete hardware are expected to be using this feature of the device driver. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Acked-by: Andy Walls Acked-by: Mauro Carvalho Chehab Cc: Andrew Morton Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Roger Pau Monné Cc: Stefan Bader Cc: Suresh Siddha Cc: Thomas Gleixner Cc: Ville Syrjälä Cc: bhelgaas@google.com Cc: konrad.wilk@oracle.com Cc: linux-media@vger.kernel.org Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Link: http://lkml.kernel.org/r/1434053994-2196-2-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar --- drivers/media/pci/ivtv/Kconfig | 3 ++ drivers/media/pci/ivtv/ivtvfb.c | 58 +++++++++++++-------------------- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/drivers/media/pci/ivtv/Kconfig b/drivers/media/pci/ivtv/Kconfig index dd6ee57e3a4c..6e5867c57305 100644 --- a/drivers/media/pci/ivtv/Kconfig +++ b/drivers/media/pci/ivtv/Kconfig @@ -57,5 +57,8 @@ config VIDEO_FB_IVTV This is used in the Hauppauge PVR-350 card. There is a driver homepage at . + In order to use this module, you will need to boot with PAT disabled + on x86 systems, using the nopat kernel parameter. + To compile this driver as a module, choose M here: the module will be called ivtvfb. diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c index 9ff1230192e8..4cb365d4ffdc 100644 --- a/drivers/media/pci/ivtv/ivtvfb.c +++ b/drivers/media/pci/ivtv/ivtvfb.c @@ -44,8 +44,8 @@ #include #include -#ifdef CONFIG_MTRR -#include +#ifdef CONFIG_X86_64 +#include #endif #include "ivtv-driver.h" @@ -155,12 +155,11 @@ struct osd_info { /* Buffer size */ u32 video_buffer_size; -#ifdef CONFIG_MTRR /* video_base rounded down as required by hardware MTRRs */ unsigned long fb_start_aligned_physaddr; /* video_base rounded up as required by hardware MTRRs */ unsigned long fb_end_aligned_physaddr; -#endif + int wc_cookie; /* Store the buffer offset */ int set_osd_coords_x; @@ -1099,6 +1098,8 @@ static int ivtvfb_init_vidmode(struct ivtv *itv) static int ivtvfb_init_io(struct ivtv *itv) { struct osd_info *oi = itv->osd_info; + /* Find the largest power of two that maps the whole buffer */ + int size_shift = 31; mutex_lock(&itv->serialize_lock); if (ivtv_init_on_first_open(itv)) { @@ -1132,29 +1133,16 @@ static int ivtvfb_init_io(struct ivtv *itv) oi->video_pbase, oi->video_vbase, oi->video_buffer_size / 1024); -#ifdef CONFIG_MTRR - { - /* Find the largest power of two that maps the whole buffer */ - int size_shift = 31; - - while (!(oi->video_buffer_size & (1 << size_shift))) { - size_shift--; - } - size_shift++; - oi->fb_start_aligned_physaddr = oi->video_pbase & ~((1 << size_shift) - 1); - oi->fb_end_aligned_physaddr = oi->video_pbase + oi->video_buffer_size; - oi->fb_end_aligned_physaddr += (1 << size_shift) - 1; - oi->fb_end_aligned_physaddr &= ~((1 << size_shift) - 1); - if (mtrr_add(oi->fb_start_aligned_physaddr, - oi->fb_end_aligned_physaddr - oi->fb_start_aligned_physaddr, - MTRR_TYPE_WRCOMB, 1) < 0) { - IVTVFB_INFO("disabled mttr\n"); - oi->fb_start_aligned_physaddr = 0; - oi->fb_end_aligned_physaddr = 0; - } - } -#endif - + while (!(oi->video_buffer_size & (1 << size_shift))) + size_shift--; + size_shift++; + oi->fb_start_aligned_physaddr = oi->video_pbase & ~((1 << size_shift) - 1); + oi->fb_end_aligned_physaddr = oi->video_pbase + oi->video_buffer_size; + oi->fb_end_aligned_physaddr += (1 << size_shift) - 1; + oi->fb_end_aligned_physaddr &= ~((1 << size_shift) - 1); + oi->wc_cookie = arch_phys_wc_add(oi->fb_start_aligned_physaddr, + oi->fb_end_aligned_physaddr - + oi->fb_start_aligned_physaddr); /* Blank the entire osd. */ memset_io(oi->video_vbase, 0, oi->video_buffer_size); @@ -1172,14 +1160,7 @@ static void ivtvfb_release_buffers (struct ivtv *itv) /* Release pseudo palette */ kfree(oi->ivtvfb_info.pseudo_palette); - -#ifdef CONFIG_MTRR - if (oi->fb_end_aligned_physaddr) { - mtrr_del(-1, oi->fb_start_aligned_physaddr, - oi->fb_end_aligned_physaddr - oi->fb_start_aligned_physaddr); - } -#endif - + arch_phys_wc_del(oi->wc_cookie); kfree(oi); itv->osd_info = NULL; } @@ -1284,6 +1265,13 @@ static int __init ivtvfb_init(void) int registered = 0; int err; +#ifdef CONFIG_X86_64 + if (WARN(pat_enabled(), + "ivtvfb needs PAT disabled, boot with nopat kernel parameter\n")) { + return -ENODEV; + } +#endif + if (ivtvfb_card_id < -1 || ivtvfb_card_id >= IVTV_MAX_CARDS) { printk(KERN_ERR "ivtvfb: ivtvfb_card_id parameter is out of range (valid range: -1 - %d)\n", IVTV_MAX_CARDS - 1); From 7ea402d01cb68224972dde3ae68bd41131b1c3a1 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 15 Jun 2015 10:28:18 +0200 Subject: [PATCH 481/481] x86/mm/pat, drivers/infiniband/ipath: Use arch_phys_wc_add() and require PAT disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are burrying direct access to MTRR code support on x86 in order to take advantage of PAT. In the future, we also want to make the default behaviour of ioremap_nocache() to use strong UC, use of mtrr_add() on those systems would make write-combining void. In order to help both enable us to later make strong UC default and in order to phase out direct MTRR access code port the driver over to arch_phys_wc_add() and annotate that the device driver requires systems to boot with PAT disabled, with the 'nopat' kernel parameter. This is a workable compromise given that the ipath device driver powers the old HTX bus cards that only work in AMD systems, while the newer IB/qib device driver powers all PCI-e cards. The ipath device driver is obsolete, hardware is hard to find and because of this its a reasonable compromise to require users of ipath to boot with 'nopat'. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Acked-by: Doug Ledford Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Walls Cc: Antonino Daplas Cc: Arnd Bergmann Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hal Rosenstock Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Mike Marciniszyn Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rickard Strandqvist Cc: Roger Pau Monné Cc: Roland Dreier Cc: Sean Hefty Cc: Stefan Bader Cc: Suresh Siddha Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Ville Syrjälä Cc: infinipath@intel.com Cc: jbeulich@suse.com Cc: konrad.wilk@oracle.com Cc: linux-rdma@vger.kernel.org Cc: mchehab@osg.samsung.com Cc: toshi.kani@hp.com Link: http://lkml.kernel.org/r/1434053994-2196-4-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1434356898-25135-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/infiniband/hw/ipath/Kconfig | 3 ++ drivers/infiniband/hw/ipath/ipath_driver.c | 18 +++++--- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 +- drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 43 ++++--------------- 4 files changed, 26 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 1d9bb115cbf6..8fe54ff00580 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -9,3 +9,6 @@ config INFINIBAND_IPATH as IP-over-InfiniBand as well as with userspace applications (in conjunction with InfiniBand userspace access). For QLogic PCIe QLE based cards, use the QIB driver instead. + + If you have this hardware you will need to boot with PAT disabled + on your x86-64 systems, use the nopat kernel parameter. diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index bd0caedafe99..2d7e503d13cb 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -42,6 +42,9 @@ #include #include #include +#ifdef CONFIG_X86_64 +#include +#endif #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -395,6 +398,14 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) unsigned long long addr; u32 bar0 = 0, bar1 = 0; +#ifdef CONFIG_X86_64 + if (WARN(pat_enabled(), + "ipath needs PAT disabled, boot with nopat kernel parameter\n")) { + ret = -ENODEV; + goto bail; + } +#endif + dd = ipath_alloc_devdata(pdev); if (IS_ERR(dd)) { ret = PTR_ERR(dd); @@ -542,6 +553,7 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dd->ipath_kregbase = __ioremap(addr, len, (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); #else + /* XXX: split this properly to enable on PAT */ dd->ipath_kregbase = ioremap_nocache(addr, len); #endif @@ -587,12 +599,8 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ret = ipath_enable_wc(dd); - if (ret) { - ipath_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + if (ret) ret = 0; - } ipath_verify_pioperf(dd); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e08db7020cd4..f0f947122779 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -463,9 +463,7 @@ struct ipath_devdata { /* offset in HT config space of slave/primary interface block */ u8 ipath_ht_slave_off; /* for write combining settings */ - unsigned long ipath_wc_cookie; - unsigned long ipath_wc_base; - unsigned long ipath_wc_len; + int wc_cookie; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; /* shadow copy of struct page *'s for exp tid pages */ diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index 4ad0b932df1f..7b6e4c843e19 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -37,7 +37,6 @@ */ #include -#include #include #include "ipath_kernel.h" @@ -122,27 +121,14 @@ int ipath_enable_wc(struct ipath_devdata *dd) } if (!ret) { - int cookie; - ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " - "(addr %llx, len=0x%llx)\n", - (unsigned long long) pioaddr, - (unsigned long long) piolen); - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); - if (cookie < 0) { - { - dev_info(&dd->pcidev->dev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " - "cookie is %d\n", cookie); - dd->ipath_wc_cookie = cookie; - dd->ipath_wc_base = (unsigned long) pioaddr; - dd->ipath_wc_len = (unsigned long) piolen; - } + dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); + if (dd->wc_cookie < 0) { + ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n"); + ret = -ENODEV; + } else if (dd->wc_cookie == 0) + ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n"); + else + ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n"); } return ret; @@ -154,16 +140,5 @@ int ipath_enable_wc(struct ipath_devdata *dd) */ void ipath_disable_wc(struct ipath_devdata *dd) { - if (dd->ipath_wc_cookie) { - int r; - ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len); - if (r < 0) - dev_info(&dd->pcidev->dev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len, r); - dd->ipath_wc_cookie = 0; /* even on failure */ - } + arch_phys_wc_del(dd->wc_cookie); }