diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..25950f0ccc33 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -939,6 +939,15 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_ITMT + bool "Intel Turbo Boost Max Technology (ITMT) scheduler support" + depends on SCHED_MC && CPU_SUP_INTEL && X86_INTEL_PSTATE + ---help--- + ITMT enabled scheduler support improves the CPU scheduler's decision + to move tasks to cpu core that can be boosted to a higher frequency + than others. It will have better performance at a cost of slightly + increased overhead in task migrations. If unsure say N here. + source "kernel/Kconfig.preempt" config UP_LATE_INIT diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index a5ca88a22ca3..8ace9511347c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -147,4 +147,32 @@ int x86_pci_root_bus_node(int bus); void x86_pci_root_bus_resources(int bus, struct list_head *resources); extern bool x86_topology_update; + +#ifdef CONFIG_SCHED_ITMT +#include + +DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); + +/* Interface to set priority of a cpu */ +void sched_set_itmt_core_prio(int prio, int core_cpu); + +/* Interface to notify scheduler that system supports ITMT */ +void sched_set_itmt_support(void); + +/* Interface to notify scheduler that system revokes ITMT support */ +void sched_clear_itmt_support(void); + +#else /* CONFIG_SCHED_ITMT */ + +static inline void sched_set_itmt_core_prio(int prio, int core_cpu) +{ +} +static inline void sched_set_itmt_support(void) +{ +} +static inline void sched_clear_itmt_support(void) +{ +} +#endif /* CONFIG_SCHED_ITMT */ + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 79076d75bdbf..bbd0ebcfcc2a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o +obj-$(CONFIG_SCHED_ITMT) += itmt.o ifdef CONFIG_FRAME_POINTER obj-y += unwind_frame.o diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c new file mode 100644 index 000000000000..63c9b3e3509d --- /dev/null +++ b/arch/x86/kernel/itmt.c @@ -0,0 +1,109 @@ +/* + * itmt.c: Support Intel Turbo Boost Max Technology 3.0 + * + * (C) Copyright 2016 Intel Corporation + * Author: Tim Chen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), + * the maximum turbo frequencies of some cores in a CPU package may be + * higher than for the other cores in the same package. In that case, + * better performance can be achieved by making the scheduler prefer + * to run tasks on the CPUs with higher max turbo frequencies. + * + * This file provides functions and data structures for enabling the + * scheduler to favor scheduling on cores can be boosted to a higher + * frequency under ITMT. + */ + +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(itmt_update_mutex); +DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); + +/* Boolean to track if system has ITMT capabilities */ +static bool __read_mostly sched_itmt_capable; + +/** + * sched_set_itmt_support() - Indicate platform supports ITMT + * + * This function is used by the OS to indicate to scheduler that the platform + * is capable of supporting the ITMT feature. + * + * The current scheme has the pstate driver detects if the system + * is ITMT capable and call sched_set_itmt_support. + * + * This must be done only after sched_set_itmt_core_prio + * has been called to set the cpus' priorities. + */ +void sched_set_itmt_support(void) +{ + mutex_lock(&itmt_update_mutex); + + sched_itmt_capable = true; + + mutex_unlock(&itmt_update_mutex); +} + +/** + * sched_clear_itmt_support() - Revoke platform's support of ITMT + * + * This function is used by the OS to indicate that it has + * revoked the platform's support of ITMT feature. + * + */ +void sched_clear_itmt_support(void) +{ + mutex_lock(&itmt_update_mutex); + + sched_itmt_capable = false; + + mutex_unlock(&itmt_update_mutex); +} + +int arch_asym_cpu_priority(int cpu) +{ + return per_cpu(sched_core_priority, cpu); +} + +/** + * sched_set_itmt_core_prio() - Set CPU priority based on ITMT + * @prio: Priority of cpu core + * @core_cpu: The cpu number associated with the core + * + * The pstate driver will find out the max boost frequency + * and call this function to set a priority proportional + * to the max boost frequency. CPU with higher boost + * frequency will receive higher priority. + * + * No need to rebuild sched domain after updating + * the CPU priorities. The sched domains have no + * dependency on CPU priorities. + */ +void sched_set_itmt_core_prio(int prio, int core_cpu) +{ + int cpu, i = 1; + + for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { + int smt_prio; + + /* + * Ensure that the siblings are moved to the end + * of the priority chain and only used when + * all other high priority cpus are out of capacity. + */ + smt_prio = prio * smp_num_siblings / i; + per_cpu(sched_core_priority, cpu) = smt_prio; + i++; + } +}