OpenCloudOS-Kernel/arch/s390/kernel/topology.c

646 lines
14 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2011
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
mm: remove include/linux/bootmem.h Move remaining definitions and declarations from include/linux/bootmem.h into include/linux/memblock.h and remove the redundant header. The includes were replaced with the semantic patch below and then semi-automated removal of duplicated '#include <linux/memblock.h> @@ @@ - #include <linux/bootmem.h> + #include <linux/memblock.h> [sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au [sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au [sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal] Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:09:49 +08:00
#include <linux/memblock.h>
#include <linux/uaccess.h>
#include <linux/sysctl.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/topology.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
enum {
TOPOLOGY_MODE_HW,
TOPOLOGY_MODE_SINGLE,
TOPOLOGY_MODE_PACKAGE,
TOPOLOGY_MODE_UNINITIALIZED
};
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};
static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
* Socket/Book linked lists and cpu_topology updates are
* protected by "sched_domains_mutex".
*/
static struct mask_info socket_info;
static struct mask_info book_info;
static struct mask_info drawer_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
cpumask_t cpus_with_topology;
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
cpumask_copy(&mask, cpumask_of(cpu));
switch (topology_mode) {
case TOPOLOGY_MODE_HW:
while (info) {
if (cpumask_test_cpu(cpu, &info->mask)) {
mask = info->mask;
break;
}
info = info->next;
}
if (cpumask_empty(&mask))
cpumask_copy(&mask, cpumask_of(cpu));
break;
case TOPOLOGY_MODE_PACKAGE:
cpumask_copy(&mask, cpu_present_mask);
break;
default:
/* fallthrough */
case TOPOLOGY_MODE_SINGLE:
cpumask_copy(&mask, cpumask_of(cpu));
break;
}
return mask;
}
static cpumask_t cpu_thread_map(unsigned int cpu)
{
cpumask_t mask;
int i;
cpumask_copy(&mask, cpumask_of(cpu));
if (topology_mode != TOPOLOGY_MODE_HW)
return mask;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_present(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
return mask;
}
#define TOPOLOGY_CORE_BITS 64
static void add_cpus_to_mask(struct topology_core *tl_core,
struct mask_info *drawer,
struct mask_info *book,
struct mask_info *socket)
{
struct cpu_topology_s390 *topo;
unsigned int core;
for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
unsigned int rcore;
int lcpu, i;
rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
if (lcpu < 0)
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
topo = &cpu_topology[lcpu + i];
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->socket_id = socket->id;
topo->core_id = rcore;
topo->thread_id = lcpu + i;
topo->dedicated = tl_core->d;
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
cpumask_set_cpu(lcpu + i, &cpus_with_topology);
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
}
}
static void clear_masks(void)
{
struct mask_info *info;
info = &socket_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
info = &book_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
info = &drawer_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
}
static union topology_entry *next_tle(union topology_entry *tle)
{
if (!tle->nl)
return (union topology_entry *)((struct topology_core *)tle + 1);
return (union topology_entry *)((struct topology_container *)tle + 1);
}
static void tl_to_masks(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;
clear_masks();
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
case 3:
drawer = drawer->next;
drawer->id = tle->container.id;
break;
case 2:
book = book->next;
book->id = tle->container.id;
break;
case 1:
socket = socket->next;
socket->id = tle->container.id;
break;
case 0:
add_cpus_to_mask(&tle->cpu, drawer, book, socket);
break;
default:
clear_masks();
return;
}
tle = next_tle(tle);
}
}
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
static void topology_update_polarization_simple(void)
{
int cpu;
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
}
static int ptf(unsigned long fc)
{
int rc;
asm volatile(
" .insn rre,0xb9a20000,%1,%1\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (rc)
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
: "d" (fc) : "cc");
return rc;
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
if (!MACHINE_HAS_TOPOLOGY)
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
else
rc = ptf(PTF_HORIZONTAL);
if (rc)
return -EBUSY;
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
return rc;
}
static void update_cpu_masks(void)
{
struct cpu_topology_s390 *topo;
int cpu, id;
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
topo->thread_mask = cpu_thread_map(cpu);
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
if (topology_mode != TOPOLOGY_MODE_HW) {
id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
topo->core_id = cpu;
topo->socket_id = id;
topo->book_id = id;
topo->drawer_id = id;
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
if (cpu_present(cpu))
cpumask_set_cpu(cpu, &cpus_with_topology);
}
}
numa_update_cpu_topology();
}
void store_topology(struct sysinfo_15_1_x *info)
{
stsi(info, 15, 1, topology_mnest_limit());
}
static void __arch_update_dedicated_flag(void *arg)
{
if (topology_cpu_dedicated(smp_processor_id()))
set_cpu_flag(CIF_DEDICATED_CPU);
else
clear_cpu_flag(CIF_DEDICATED_CPU);
}
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
int rc = 0;
mutex_lock(&smp_cpu_state_mutex);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
tl_to_masks(info);
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
}
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
mutex_unlock(&smp_cpu_state_mutex);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
return rc;
}
int arch_update_cpu_topology(void)
{
struct device *dev;
int cpu, rc;
rc = __arch_update_cpu_topology();
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
for_each_online_cpu(cpu) {
cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Borislav Petkov <bp@amd64.org> Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk> Cc: Len Brown <lenb@kernel.org> Cc: Zhang Rui <rui.zhang@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2011-12-22 06:29:42 +08:00
dev = get_cpu_device(cpu);
if (dev)
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
return rc;
}
[S390] cpu topology: Fix possible deadlock. When we get a notification that cpu topology changed, we schedule a work struct which just calls arch_reinit_sched_domains. This function in turn calls get_online_cpus() which results int the lockdep warning below. After all it turnded out that it's not legal to call get_online_cpus() from the context of a multi-threaded work queue. It could deadlock this way: process 0 (events/cpu-x): -> run_workqueue -> removes my work_struct from the work queue -> calls work_struct->fn -> get_online_cpus() -> locks on cpu_hotplug.lock since process 1 below is doing cpu hotplug process 1: -> cpu_down (for cpu-x) -> cpu_hotplug_begin (holds cpu_hotplug.lock now) -> cpu-x dead -> notifier_call_chain with CPU_DEAD -> cleanup_workqueue_thread -> flush_cpu_workqueue (succeeds) -> kthread_stop for events/cpu-x -> now kthread_stop waits for my work_struct to complete from within process 0. -> dead. A single threaded workqueue wouldn't have such problems, however there is no such common queue available and it's not worth to create one for the very rare calls to arch_reinit_sched_domains. So we just create a kernel thread from our work struct which calls arch_reinit_sched_domains and are done with it. Thanks to Oleg Nesterov and Peter Zijlstra for helping me figuring out that this isn't a false positive lockdep warning: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.25-03562-g3dc5063-dirty #12 ------------------------------------------------------- events/3/14 is trying to acquire lock: (&cpu_hotplug.lock){--..}, at: [<0000000000076094>] get_online_cpus+0x50/0x78 but task is already holding lock: (topology_work){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (topology_work){--..}: [<000000000006fc74>] __lock_acquire+0x1010/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<0000000000059d48>] run_workqueue+0x170/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc -> #1 (events){--..}: [<000000000006fc74>] __lock_acquire+0x1010/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<000000000005a23c>] cleanup_workqueue_thread+0x60/0xa8 [<00000000003b2ab8>] workqueue_cpu_callback+0xbc/0x170 [<00000000003bba80>] notifier_call_chain+0x5c/0xa4 [<00000000000655a2>] __raw_notifier_call_chain+0x26/0x38 [<00000000000655e2>] raw_notifier_call_chain+0x2e/0x40 [<0000000000075e00>] cpu_down+0x228/0x31c [<00000000003b1dd8>] store_online+0x64/0xb8 [<00000000001e7128>] sysdev_store+0x48/0x58 [<0000000000121cd2>] sysfs_write_file+0x126/0x1c0 [<00000000000c1944>] vfs_write+0xb0/0x15c [<00000000000c20e6>] sys_write+0x56/0x88 [<0000000000027a68>] sys32_write+0x34/0x4c [<0000000000023f70>] sysc_noemu+0x10/0x16 [<0000000077f3f186>] 0x77f3f186 -> #0 (&cpu_hotplug.lock){--..}: [<000000000006fa84>] __lock_acquire+0xe20/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<00000000003b701c>] mutex_lock_nested+0xd0/0x364 [<0000000000076094>] get_online_cpus+0x50/0x78 [<000000000003a03e>] arch_reinit_sched_domains+0x26/0x58 [<000000000002700e>] topology_work_fn+0x26/0x34 [<0000000000059d4e>] run_workqueue+0x176/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc other info that might help us debug this: 2 locks held by events/3/14: #0: (events){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 #1: (topology_work){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 stack backtrace: CPU: 3 Not tainted 2.6.25-03562-g3dc5063-dirty #12 Process events/3 (pid: 14, task: 000000002fb04038, ksp: 000000002fb0bd70) 0400000000000000 000000002fb0ba40 0000000000000002 0000000000000000 000000002fb0bae0 000000002fb0ba58 000000002fb0ba58 0000000000016488 0000000000000000 000000002fb0bd70 0000000000000000 0000000000000000 000000002fb0ba40 000000000000000c 000000002fb0ba40 000000002fb0bab0 00000000003c99e0 0000000000016488 000000002fb0ba40 000000002fb0ba90 Call Trace: ([<00000000000163fc>] show_trace+0x138/0x158) [<00000000000164e2>] show_stack+0xc6/0xf8 [<0000000000016624>] dump_stack+0xb0/0xc0 [<000000000006cd36>] print_circular_bug_tail+0xa2/0xb4 [<000000000006fa84>] __lock_acquire+0xe20/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<00000000003b701c>] mutex_lock_nested+0xd0/0x364 [<0000000000076094>] get_online_cpus+0x50/0x78 [<000000000003a03e>] arch_reinit_sched_domains+0x26/0x58 [<000000000002700e>] topology_work_fn+0x26/0x34 [<0000000000059d4e>] run_workqueue+0x176/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2008-04-30 19:38:41 +08:00
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
}
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
void topology_schedule_update(void)
{
schedule_work(&topology_work);
}
static void topology_flush_work(void)
{
flush_work(&topology_work);
}
timer: Remove users of TIMER_DEFERRED_INITIALIZER This removes uses of TIMER_DEFERRED_INITIALIZER and chooses a location to call timer_setup() from before add_timer() or mod_timer() is called. Adjusts callbacks to use from_timer() as needed. Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mips@linux-mips.org Cc: Petr Mladek <pmladek@suse.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Sebastian Reichel <sre@kernel.org> Cc: Kalle Valo <kvalo@qca.qualcomm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Pavel Machek <pavel@ucw.cz> Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com> Cc: Wim Van Sebroeck <wim@iguana.be> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Ursula Braun <ubraun@linux.vnet.ibm.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: Harish Patil <harish.patil@cavium.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Manish Chopra <manish.chopra@cavium.com> Cc: Len Brown <len.brown@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Mark Gross <mark.gross@intel.com> Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Michael Reed <mdr@sgi.com> Cc: netdev@vger.kernel.org Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com> Link: https://lkml.kernel.org/r/1507159627-127660-7-git-send-email-keescook@chromium.org
2017-10-05 07:27:00 +08:00
static void topology_timer_fn(struct timer_list *unused)
{
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 13:46:13 +08:00
if (ptf(PTF_CHECK))
topology_schedule_update();
set_topology_timer();
}
timer: Remove users of TIMER_DEFERRED_INITIALIZER This removes uses of TIMER_DEFERRED_INITIALIZER and chooses a location to call timer_setup() from before add_timer() or mod_timer() is called. Adjusts callbacks to use from_timer() as needed. Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mips@linux-mips.org Cc: Petr Mladek <pmladek@suse.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Sebastian Reichel <sre@kernel.org> Cc: Kalle Valo <kvalo@qca.qualcomm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Pavel Machek <pavel@ucw.cz> Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com> Cc: Wim Van Sebroeck <wim@iguana.be> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Ursula Braun <ubraun@linux.vnet.ibm.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: Harish Patil <harish.patil@cavium.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Manish Chopra <manish.chopra@cavium.com> Cc: Len Brown <len.brown@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Mark Gross <mark.gross@intel.com> Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Michael Reed <mdr@sgi.com> Cc: netdev@vger.kernel.org Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com> Link: https://lkml.kernel.org/r/1507159627-127660-7-git-send-email-keescook@chromium.org
2017-10-05 07:27:00 +08:00
static struct timer_list topology_timer;
static atomic_t topology_poll = ATOMIC_INIT(0);
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + HZ / 10);
else
mod_timer(&topology_timer, jiffies + HZ * 60);
}
void topology_expect_change(void)
{
if (!MACHINE_HAS_TOPOLOGY)
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
*/
if (atomic_read(&topology_poll) > 60)
return;
atomic_add(60, &topology_poll);
set_topology_timer();
}
static int cpu_management;
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
count = sprintf(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static ssize_t dispatching_store(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
int val, rc;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
rc = 0;
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
if (cpu_management == val)
goto out;
rc = topology_set_cpu_management(val);
if (rc)
goto out;
cpu_management = val;
topology_expect_change();
out:
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
static ssize_t cpu_polarization_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int cpu = dev->id;
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
count = sprintf(buf, "horizontal\n");
break;
case POLARIZATION_VL:
count = sprintf(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
count = sprintf(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
count = sprintf(buf, "vertical:high\n");
break;
default:
count = sprintf(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
static struct attribute *topology_cpu_attrs[] = {
&dev_attr_polarization.attr,
NULL,
};
static struct attribute_group topology_cpu_attr_group = {
.attrs = topology_cpu_attrs,
};
static ssize_t cpu_dedicated_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int cpu = dev->id;
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
static struct attribute *topology_extra_cpu_attrs[] = {
&dev_attr_dedicated.attr,
NULL,
};
static struct attribute_group topology_extra_cpu_attr_group = {
.attrs = topology_extra_cpu_attrs,
};
int topology_cpu_init(struct cpu *cpu)
{
int rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
if (rc || !MACHINE_HAS_TOPOLOGY)
return rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
if (rc)
sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
return rc;
}
static const struct cpumask *cpu_thread_mask(int cpu)
{
return &cpu_topology[cpu].thread_mask;
}
const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_topology[cpu].core_mask;
}
static const struct cpumask *cpu_book_mask(int cpu)
{
return &cpu_topology[cpu].book_mask;
}
static const struct cpumask *cpu_drawer_mask(int cpu)
{
return &cpu_topology[cpu].drawer_mask;
}
static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
static void __init alloc_masks(struct sysinfo_15_1_x *info,
struct mask_info *mask, int offset)
{
int i, nr_masks;
nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
for (i = 0; i < info->mnest - offset; i++)
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
memblock: remove _virt from APIs returning virtual address The conversion is done using sed -i 's@memblock_virt_alloc@memblock_alloc@g' \ $(git grep -l memblock_virt_alloc) Link: http://lkml.kernel.org/r/1536927045-23536-8-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:08:04 +08:00
mask->next = memblock_alloc(sizeof(*mask->next), 8);
treewide: add checks for the return value of memblock_alloc*() Add check for the return value of memblock_alloc*() functions and call panic() in case of error. The panic message repeats the one used by panicing memblock allocators with adjustment of parameters to include only relevant ones. The replacement was mostly automated with semantic patches like the one below with manual massaging of format strings. @@ expression ptr, size, align; @@ ptr = memblock_alloc(size, align); + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, size, align); [anders.roxell@linaro.org: use '%pa' with 'phys_addr_t' type] Link: http://lkml.kernel.org/r/20190131161046.21886-1-anders.roxell@linaro.org [rppt@linux.ibm.com: fix format strings for panics after memblock_alloc] Link: http://lkml.kernel.org/r/1548950940-15145-1-git-send-email-rppt@linux.ibm.com [rppt@linux.ibm.com: don't panic if the allocation in sparse_buffer_init fails] Link: http://lkml.kernel.org/r/20190131074018.GD28876@rapoport-lnx [akpm@linux-foundation.org: fix xtensa printk warning] Link: http://lkml.kernel.org/r/1548057848-15136-20-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Reviewed-by: Guo Ren <ren_guo@c-sky.com> [c-sky] Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> [s390] Reviewed-by: Juergen Gross <jgross@suse.com> [Xen] Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org> [m68k] Acked-by: Max Filippov <jcmvbkbc@gmail.com> [xtensa] Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:31 +08:00
if (!mask->next)
panic("%s: Failed to allocate %zu bytes align=0x%x\n",
__func__, sizeof(*mask->next), 8);
mask = mask->next;
}
}
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
set_sched_topology(s390_topology);
if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
if (MACHINE_HAS_TOPOLOGY)
topology_mode = TOPOLOGY_MODE_HW;
else
topology_mode = TOPOLOGY_MODE_SINGLE;
}
if (!MACHINE_HAS_TOPOLOGY)
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
goto out;
memblock: remove _virt from APIs returning virtual address The conversion is done using sed -i 's@memblock_virt_alloc@memblock_alloc@g' \ $(git grep -l memblock_virt_alloc) Link: http://lkml.kernel.org/r/1536927045-23536-8-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:08:04 +08:00
tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
treewide: add checks for the return value of memblock_alloc*() Add check for the return value of memblock_alloc*() functions and call panic() in case of error. The panic message repeats the one used by panicing memblock allocators with adjustment of parameters to include only relevant ones. The replacement was mostly automated with semantic patches like the one below with manual massaging of format strings. @@ expression ptr, size, align; @@ ptr = memblock_alloc(size, align); + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, size, align); [anders.roxell@linaro.org: use '%pa' with 'phys_addr_t' type] Link: http://lkml.kernel.org/r/20190131161046.21886-1-anders.roxell@linaro.org [rppt@linux.ibm.com: fix format strings for panics after memblock_alloc] Link: http://lkml.kernel.org/r/1548950940-15145-1-git-send-email-rppt@linux.ibm.com [rppt@linux.ibm.com: don't panic if the allocation in sparse_buffer_init fails] Link: http://lkml.kernel.org/r/20190131074018.GD28876@rapoport-lnx [akpm@linux-foundation.org: fix xtensa printk warning] Link: http://lkml.kernel.org/r/1548057848-15136-20-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Reviewed-by: Guo Ren <ren_guo@c-sky.com> [c-sky] Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> [s390] Reviewed-by: Juergen Gross <jgross@suse.com> [Xen] Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org> [m68k] Acked-by: Max Filippov <jcmvbkbc@gmail.com> [xtensa] Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:31 +08:00
if (!tl_info)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
alloc_masks(info, &drawer_info, 3);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 16:50:21 +08:00
out:
__arch_update_cpu_topology();
__arch_update_dedicated_flag(NULL);
}
static inline int topology_get_mode(int enabled)
{
if (!enabled)
return TOPOLOGY_MODE_SINGLE;
return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
static inline int topology_is_enabled(void)
{
return topology_mode != TOPOLOGY_MODE_SINGLE;
}
static int __init topology_setup(char *str)
{
bool enabled;
int rc;
rc = kstrtobool(str, &enabled);
if (rc)
return rc;
topology_mode = topology_get_mode(enabled);
return 0;
}
early_param("topology", topology_setup);
static int topology_ctl_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
int new_mode;
int rc;
struct ctl_table ctl_entry = {
.procname = ctl->procname,
.data = &enabled,
.maxlen = sizeof(int),
proc/sysctl: add shared variables for range check In the sysctl code the proc_dointvec_minmax() function is often used to validate the user supplied value between an allowed range. This function uses the extra1 and extra2 members from struct ctl_table as minimum and maximum allowed value. On sysctl handler declaration, in every source file there are some readonly variables containing just an integer which address is assigned to the extra1 and extra2 members, so the sysctl range is enforced. The special values 0, 1 and INT_MAX are very often used as range boundary, leading duplication of variables like zero=0, one=1, int_max=INT_MAX in different source files: $ git grep -E '\.extra[12].*&(zero|one|int_max)' |wc -l 248 Add a const int array containing the most commonly used values, some macros to refer more easily to the correct array member, and use them instead of creating a local one for every object file. This is the bloat-o-meter output comparing the old and new binary compiled with the default Fedora config: # scripts/bloat-o-meter -d vmlinux.o.old vmlinux.o add/remove: 2/2 grow/shrink: 0/2 up/down: 24/-188 (-164) Data old new delta sysctl_vals - 12 +12 __kstrtab_sysctl_vals - 12 +12 max 14 10 -4 int_max 16 - -16 one 68 - -68 zero 128 28 -100 Total: Before=20583249, After=20583085, chg -0.00% [mcroce@redhat.com: tipc: remove two unused variables] Link: http://lkml.kernel.org/r/20190530091952.4108-1-mcroce@redhat.com [akpm@linux-foundation.org: fix net/ipv6/sysctl_net_ipv6.c] [arnd@arndb.de: proc/sysctl: make firmware loader table conditional] Link: http://lkml.kernel.org/r/20190617130014.1713870-1-arnd@arndb.de [akpm@linux-foundation.org: fix fs/eventpoll.c] Link: http://lkml.kernel.org/r/20190430180111.10688-1-mcroce@redhat.com Signed-off-by: Matteo Croce <mcroce@redhat.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Kees Cook <keescook@chromium.org> Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-19 06:58:50 +08:00
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
if (rc < 0 || !write)
return rc;
mutex_lock(&smp_cpu_state_mutex);
new_mode = topology_get_mode(enabled);
if (topology_mode != new_mode) {
topology_mode = new_mode;
topology_schedule_update();
}
mutex_unlock(&smp_cpu_state_mutex);
topology_flush_work();
return rc;
}
static struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
{ },
};
static struct ctl_table topology_dir_table[] = {
{
.procname = "s390",
.maxlen = 0,
.mode = 0555,
.child = topology_ctl_table,
},
{ },
};
static int __init topology_init(void)
{
timer: Remove users of TIMER_DEFERRED_INITIALIZER This removes uses of TIMER_DEFERRED_INITIALIZER and chooses a location to call timer_setup() from before add_timer() or mod_timer() is called. Adjusts callbacks to use from_timer() as needed. Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mips@linux-mips.org Cc: Petr Mladek <pmladek@suse.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Sebastian Reichel <sre@kernel.org> Cc: Kalle Valo <kvalo@qca.qualcomm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Pavel Machek <pavel@ucw.cz> Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com> Cc: Wim Van Sebroeck <wim@iguana.be> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Ursula Braun <ubraun@linux.vnet.ibm.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: Harish Patil <harish.patil@cavium.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Manish Chopra <manish.chopra@cavium.com> Cc: Len Brown <len.brown@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Mark Gross <mark.gross@intel.com> Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Michael Reed <mdr@sgi.com> Cc: netdev@vger.kernel.org Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com> Link: https://lkml.kernel.org/r/1507159627-127660-7-git-send-email-keescook@chromium.org
2017-10-05 07:27:00 +08:00
timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
if (MACHINE_HAS_TOPOLOGY)
set_topology_timer();
else
topology_update_polarization_simple();
register_sysctl_table(topology_dir_table);
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);