diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1699d18bd154..bf1144286ca7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,13 @@ int arch_update_cpu_topology(void) return retval; } +void arch_rebuild_cpu_topology(void) +{ + x86_topology_update = true; + rebuild_sched_domains(); + x86_topology_update = false; +} + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index bf336aa4deaf..dbbe6fa6a42f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -75,6 +75,7 @@ static int register_cpu_capacity_sysctl(void) } subsys_initcall(register_cpu_capacity_sysctl); +static u32 capacity_scale; static int update_topology; int topology_update_cpu_topology(void) @@ -94,7 +95,14 @@ static void update_topology_flags_workfn(struct work_struct *work) update_topology = 0; } -static u32 capacity_scale; +void __weak arch_rebuild_cpu_topology(void) +{ + update_topology = 1; + rebuild_sched_domains(); + pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); + update_topology = 0; +} + static u32 *raw_capacity; static int free_raw_capacity(void) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 6a67c16b436b..96b072b2a74c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -128,4 +128,10 @@ extern int sched_energy_aware_handler(struct ctl_table *table, int write, loff_t *ppos); #endif +#ifdef CONFIG_SCHED_CLUSTER +extern unsigned int sysctl_sched_cluster; +int sched_cluster_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 587814e357fc..3d68447a7577 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -44,6 +44,7 @@ if (nr_cpus_node(node)) int arch_update_cpu_topology(void); +void arch_rebuild_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 48d796f02d4a..dc5f92ac8111 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7773,6 +7773,7 @@ int sched_cpu_dying(unsigned int cpu) void __init sched_init_smp(void) { sched_init_numa(); + set_sched_cluster(); /* * There's no userspace yet to cause hotplug operations; hence all the diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bd6f1f042c4d..eb6a4ffb8103 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1821,6 +1821,12 @@ this_rq_lock_irq(struct rq_flags *rf) return rq; } +#ifdef CONFIG_SCHED_CLUSTER +extern void set_sched_cluster(void); +#else +static inline void set_sched_cluster(void) { } +#endif + #ifdef CONFIG_NUMA enum numa_topology_type { NUMA_DIRECT, diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index e538e23ab60b..2d14bd5e6f4d 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1455,6 +1455,73 @@ static struct sched_domain_topology_level default_topology[] = { static struct sched_domain_topology_level *sched_domain_topology = default_topology; +#ifdef CONFIG_SCHED_CLUSTER +void set_sched_cluster(void) +{ + struct sched_domain_topology_level *tl; + + for (tl = sched_domain_topology; tl->mask; tl++) { + if (tl->sd_flags && (tl->sd_flags() & SD_CLUSTER)) { + if (!sysctl_sched_cluster) + tl->flags |= SDTL_SKIP; + else + tl->flags &= ~SDTL_SKIP; + break; + } + } +} + +/* set via /proc/sys/kernel/sched_cluster */ +unsigned int __read_mostly sysctl_sched_cluster = 1; + +static DEFINE_MUTEX(sched_cluster_mutex); +int sched_cluster_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + unsigned int oldval; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&sched_cluster_mutex); + oldval = sysctl_sched_cluster; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) { + if (oldval != sysctl_sched_cluster) { + set_sched_cluster(); + arch_rebuild_cpu_topology(); + } + } + mutex_unlock(&sched_cluster_mutex); + + return ret; +} + +static int zero; +static int one = 1; + +static struct ctl_table sched_cluster_sysctls[] = { + { + .procname = "sched_cluster", + .data = &sysctl_sched_cluster, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_cluster_handler, + .extra1 = (void *)&zero, + .extra2 = (void *)&one, + }, + {} +}; + +static int __init sched_cluster_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_cluster_sysctls); + return 0; +} +late_initcall(sched_cluster_sysctl_init); +#endif + static struct sched_domain_topology_level * next_tl(struct sched_domain_topology_level *tl) {