- Avoid touching ~100 config files in order to be able to select
the preemption model - clear cluster CPU masks too, on the CPU unplug path - prevent use-after-free in cfs - Prevent a race condition when updating CPU cache domains - Factor out common shared part of smp_prepare_cpus() into a common helper which can be called by both baremetal and Xen, in order to fix a booting of Xen PV guests -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmGQ8HcACgkQEsHwGGHe VUouoA//WAZ/dZu7IiM06JhZWswa2yNsdU8qQHys81lEqstaBqiWuZdg1qJTVIir 2d0aN0keiPcsLyAsp1UJ2g/K/7D5vSJWDzsHKfEAToiAm8Tntai2LlSocWWfeSQm 10grDHWpEHbj0hTHTA6HYOr2WbY4/LnR4cdL0WobIzivIrRTx49d0XUOUfWLP5KX 60uM6dSjwpJrQUnvzk+bhGiHVmutFrEJy+UU/0o+nxkdhwraNiSbLi0007BGRCof 6dokRRvLLR09dl1LMG51gVjQch4j/lCx6EWWUhYOFeV3I3gibSCNkmu7dpmMCBTR QWO01cR9gyFN4xQ2is4I36M5L0/8T+sbGvvXIXNDT/XWr0/p+g6p2mx0cd2XiYIr ZthGRcxxV/KGmxfPaygKS9tpQseMEIrdd6VjAnGfZ3OS6CtUvYt8d0B2Soj8FALQ N9fMXDIEP3uUZim8UvCT6HBKlj9LR5uI5n+dAQ6uzsenO9WqeGeldc/N26/+osdN vo4lNYTqiXJPhJvunYW5t4j5JnUa3grDHioAPWaQRJlWtEZBGKs9SXTcweg/KURb mNfe1RfSlGJt28RD3E18gXeSS7xWdKgpcVX1rmW/9tUjX04NNDWjq4sAzOj7c+Ir 4sr78XgCY0pUxFaFYxvQWFUy7wcm0zAczo1RGUhcDTf1edDEvjo= =s2MX -----END PGP SIGNATURE----- Merge tag 'sched_urgent_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes from Borislav Petkov: - Avoid touching ~100 config files in order to be able to select the preemption model - clear cluster CPU masks too, on the CPU unplug path - prevent use-after-free in cfs - Prevent a race condition when updating CPU cache domains - Factor out common shared part of smp_prepare_cpus() into a common helper which can be called by both baremetal and Xen, in order to fix a booting of Xen PV guests * tag 'sched_urgent_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: preempt: Restore preemption model selection configs arch_topology: Fix missing clear cluster_cpumask in remove_cpu_topology() sched/fair: Prevent dead task groups from regaining cfs_rq's sched/core: Mitigate race cpus_share_cache()/update_top_cache_domain() x86/smp: Factor out parts of native_smp_prepare_cpus()
This commit is contained in:
commit
fc661f2dcb
|
@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
|
|||
|
||||
void cpu_disable_common(void);
|
||||
void native_smp_prepare_boot_cpu(void);
|
||||
void smp_prepare_cpus_common(void);
|
||||
void native_smp_prepare_cpus(unsigned int max_cpus);
|
||||
void calculate_max_logical_packages(void);
|
||||
void native_smp_cpus_done(unsigned int max_cpus);
|
||||
|
|
|
@ -1350,12 +1350,7 @@ static void __init smp_get_logical_apicid(void)
|
|||
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for SMP bootup.
|
||||
* @max_cpus: configured maximum number of CPUs, It is a legacy parameter
|
||||
* for common interface support.
|
||||
*/
|
||||
void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
void __init smp_prepare_cpus_common(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
|
@ -1386,6 +1381,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
|||
set_sched_topology(x86_topology);
|
||||
|
||||
set_cpu_sibling_map(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for SMP bootup.
|
||||
* @max_cpus: configured maximum number of CPUs, It is a legacy parameter
|
||||
* for common interface support.
|
||||
*/
|
||||
void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
smp_prepare_cpus_common();
|
||||
|
||||
init_freq_invariance(false, false);
|
||||
smp_sanity_check();
|
||||
|
||||
|
|
|
@ -225,7 +225,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
|
|||
static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
unsigned cpu;
|
||||
unsigned int i;
|
||||
|
||||
if (skip_ioapic_setup) {
|
||||
char *m = (max_cpus == 0) ?
|
||||
|
@ -238,16 +237,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
|
|||
}
|
||||
xen_init_lock_cpu(0);
|
||||
|
||||
smp_store_boot_cpu_info();
|
||||
cpu_data(0).x86_max_cores = 1;
|
||||
smp_prepare_cpus_common();
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
|
||||
}
|
||||
set_cpu_sibling_map(0);
|
||||
cpu_data(0).x86_max_cores = 1;
|
||||
|
||||
speculative_store_bypass_ht_init();
|
||||
|
||||
|
|
|
@ -677,6 +677,8 @@ void remove_cpu_topology(unsigned int cpu)
|
|||
cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
|
||||
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
|
||||
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
|
||||
for_each_cpu(sibling, topology_cluster_cpumask(cpu))
|
||||
cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
|
||||
for_each_cpu(sibling, topology_llc_cpumask(cpu))
|
||||
cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@
|
|||
struct completion;
|
||||
struct user;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_VOLUNTARY
|
||||
#ifdef CONFIG_PREEMPT_VOLUNTARY_BUILD
|
||||
|
||||
extern int __cond_resched(void);
|
||||
# define might_resched() __cond_resched()
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#else
|
||||
#define MODULE_VERMAGIC_SMP ""
|
||||
#endif
|
||||
#ifdef CONFIG_PREEMPT
|
||||
#ifdef CONFIG_PREEMPT_BUILD
|
||||
#define MODULE_VERMAGIC_PREEMPT "preempt "
|
||||
#elif defined(CONFIG_PREEMPT_RT)
|
||||
#define MODULE_VERMAGIC_PREEMPT "preempt_rt "
|
||||
|
|
|
@ -30,7 +30,7 @@ $(obj)/version.o: include/generated/compile.h
|
|||
quiet_cmd_compile.h = CHK $@
|
||||
cmd_compile.h = \
|
||||
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
|
||||
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \
|
||||
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
|
||||
"$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
|
||||
|
||||
include/generated/compile.h: FORCE
|
||||
|
|
|
@ -1,12 +1,23 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config PREEMPT_NONE_BUILD
|
||||
bool
|
||||
|
||||
config PREEMPT_VOLUNTARY_BUILD
|
||||
bool
|
||||
|
||||
config PREEMPT_BUILD
|
||||
bool
|
||||
select PREEMPTION
|
||||
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
|
||||
|
||||
choice
|
||||
prompt "Preemption Model"
|
||||
default PREEMPT_NONE_BEHAVIOUR
|
||||
default PREEMPT_NONE
|
||||
|
||||
config PREEMPT_NONE_BEHAVIOUR
|
||||
config PREEMPT_NONE
|
||||
bool "No Forced Preemption (Server)"
|
||||
select PREEMPT_NONE if !PREEMPT_DYNAMIC
|
||||
select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
|
||||
help
|
||||
This is the traditional Linux preemption model, geared towards
|
||||
throughput. It will still provide good latencies most of the
|
||||
|
@ -18,10 +29,10 @@ config PREEMPT_NONE_BEHAVIOUR
|
|||
raw processing power of the kernel, irrespective of scheduling
|
||||
latencies.
|
||||
|
||||
config PREEMPT_VOLUNTARY_BEHAVIOUR
|
||||
config PREEMPT_VOLUNTARY
|
||||
bool "Voluntary Kernel Preemption (Desktop)"
|
||||
depends on !ARCH_NO_PREEMPT
|
||||
select PREEMPT_VOLUNTARY if !PREEMPT_DYNAMIC
|
||||
select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
|
||||
help
|
||||
This option reduces the latency of the kernel by adding more
|
||||
"explicit preemption points" to the kernel code. These new
|
||||
|
@ -37,10 +48,10 @@ config PREEMPT_VOLUNTARY_BEHAVIOUR
|
|||
|
||||
Select this if you are building a kernel for a desktop system.
|
||||
|
||||
config PREEMPT_BEHAVIOUR
|
||||
config PREEMPT
|
||||
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||||
depends on !ARCH_NO_PREEMPT
|
||||
select PREEMPT
|
||||
select PREEMPT_BUILD
|
||||
help
|
||||
This option reduces the latency of the kernel by making
|
||||
all kernel code (that is not executing in a critical section)
|
||||
|
@ -58,7 +69,7 @@ config PREEMPT_BEHAVIOUR
|
|||
|
||||
config PREEMPT_RT
|
||||
bool "Fully Preemptible Kernel (Real-Time)"
|
||||
depends on EXPERT && ARCH_SUPPORTS_RT && !PREEMPT_DYNAMIC
|
||||
depends on EXPERT && ARCH_SUPPORTS_RT
|
||||
select PREEMPTION
|
||||
help
|
||||
This option turns the kernel into a real-time kernel by replacing
|
||||
|
@ -75,17 +86,6 @@ config PREEMPT_RT
|
|||
|
||||
endchoice
|
||||
|
||||
config PREEMPT_NONE
|
||||
bool
|
||||
|
||||
config PREEMPT_VOLUNTARY
|
||||
bool
|
||||
|
||||
config PREEMPT
|
||||
bool
|
||||
select PREEMPTION
|
||||
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
|
||||
|
||||
config PREEMPT_COUNT
|
||||
bool
|
||||
|
||||
|
@ -95,8 +95,8 @@ config PREEMPTION
|
|||
|
||||
config PREEMPT_DYNAMIC
|
||||
bool "Preemption behaviour defined on boot"
|
||||
depends on HAVE_PREEMPT_DYNAMIC
|
||||
select PREEMPT
|
||||
depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
|
||||
select PREEMPT_BUILD
|
||||
default y
|
||||
help
|
||||
This option allows to define the preemption model on the kernel
|
||||
|
|
|
@ -31,7 +31,7 @@ static inline void autogroup_destroy(struct kref *kref)
|
|||
ag->tg->rt_se = NULL;
|
||||
ag->tg->rt_rq = NULL;
|
||||
#endif
|
||||
sched_offline_group(ag->tg);
|
||||
sched_release_group(ag->tg);
|
||||
sched_destroy_group(ag->tg);
|
||||
}
|
||||
|
||||
|
|
|
@ -3726,6 +3726,9 @@ out:
|
|||
|
||||
bool cpus_share_cache(int this_cpu, int that_cpu)
|
||||
{
|
||||
if (this_cpu == that_cpu)
|
||||
return true;
|
||||
|
||||
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
|
||||
}
|
||||
|
||||
|
@ -6625,13 +6628,13 @@ __setup("preempt=", setup_preempt_mode);
|
|||
static void __init preempt_dynamic_init(void)
|
||||
{
|
||||
if (preempt_dynamic_mode == preempt_dynamic_undefined) {
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_NONE_BEHAVIOUR)) {
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
|
||||
sched_dynamic_update(preempt_dynamic_none);
|
||||
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BEHAVIOUR)) {
|
||||
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
||||
sched_dynamic_update(preempt_dynamic_voluntary);
|
||||
} else {
|
||||
/* Default static call setting, nothing to do */
|
||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_BEHAVIOUR));
|
||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
||||
preempt_dynamic_mode = preempt_dynamic_full;
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
}
|
||||
|
@ -9716,6 +9719,22 @@ static void sched_free_group(struct task_group *tg)
|
|||
kmem_cache_free(task_group_cache, tg);
|
||||
}
|
||||
|
||||
static void sched_free_group_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
sched_free_group(container_of(rcu, struct task_group, rcu));
|
||||
}
|
||||
|
||||
static void sched_unregister_group(struct task_group *tg)
|
||||
{
|
||||
unregister_fair_sched_group(tg);
|
||||
unregister_rt_sched_group(tg);
|
||||
/*
|
||||
* We have to wait for yet another RCU grace period to expire, as
|
||||
* print_cfs_stats() might run concurrently.
|
||||
*/
|
||||
call_rcu(&tg->rcu, sched_free_group_rcu);
|
||||
}
|
||||
|
||||
/* allocate runqueue etc for a new task group */
|
||||
struct task_group *sched_create_group(struct task_group *parent)
|
||||
{
|
||||
|
@ -9759,25 +9778,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
|
|||
}
|
||||
|
||||
/* rcu callback to free various structures associated with a task group */
|
||||
static void sched_free_group_rcu(struct rcu_head *rhp)
|
||||
static void sched_unregister_group_rcu(struct rcu_head *rhp)
|
||||
{
|
||||
/* Now it should be safe to free those cfs_rqs: */
|
||||
sched_free_group(container_of(rhp, struct task_group, rcu));
|
||||
sched_unregister_group(container_of(rhp, struct task_group, rcu));
|
||||
}
|
||||
|
||||
void sched_destroy_group(struct task_group *tg)
|
||||
{
|
||||
/* Wait for possible concurrent references to cfs_rqs complete: */
|
||||
call_rcu(&tg->rcu, sched_free_group_rcu);
|
||||
call_rcu(&tg->rcu, sched_unregister_group_rcu);
|
||||
}
|
||||
|
||||
void sched_offline_group(struct task_group *tg)
|
||||
void sched_release_group(struct task_group *tg)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/* End participation in shares distribution: */
|
||||
unregister_fair_sched_group(tg);
|
||||
|
||||
/*
|
||||
* Unlink first, to avoid walk_tg_tree_from() from finding us (via
|
||||
* sched_cfs_period_timer()).
|
||||
*
|
||||
* For this to be effective, we have to wait for all pending users of
|
||||
* this task group to leave their RCU critical section to ensure no new
|
||||
* user will see our dying task group any more. Specifically ensure
|
||||
* that tg_unthrottle_up() won't add decayed cfs_rq's to it.
|
||||
*
|
||||
* We therefore defer calling unregister_fair_sched_group() to
|
||||
* sched_unregister_group() which is guarantied to get called only after the
|
||||
* current RCU grace period has expired.
|
||||
*/
|
||||
spin_lock_irqsave(&task_group_lock, flags);
|
||||
list_del_rcu(&tg->list);
|
||||
list_del_rcu(&tg->siblings);
|
||||
|
@ -9896,7 +9925,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
|
|||
{
|
||||
struct task_group *tg = css_tg(css);
|
||||
|
||||
sched_offline_group(tg);
|
||||
sched_release_group(tg);
|
||||
}
|
||||
|
||||
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
|
@ -9906,7 +9935,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
|
|||
/*
|
||||
* Relies on the RCU grace period between css_released() and this.
|
||||
*/
|
||||
sched_free_group(tg);
|
||||
sched_unregister_group(tg);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -11456,8 +11456,6 @@ void free_fair_sched_group(struct task_group *tg)
|
|||
{
|
||||
int i;
|
||||
|
||||
destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (tg->cfs_rq)
|
||||
kfree(tg->cfs_rq[i]);
|
||||
|
@ -11534,6 +11532,8 @@ void unregister_fair_sched_group(struct task_group *tg)
|
|||
struct rq *rq;
|
||||
int cpu;
|
||||
|
||||
destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (tg->se[cpu])
|
||||
remove_entity_load_avg(tg->se[cpu]);
|
||||
|
|
|
@ -137,13 +137,17 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
|
|||
return rt_rq->rq;
|
||||
}
|
||||
|
||||
void unregister_rt_sched_group(struct task_group *tg)
|
||||
{
|
||||
if (tg->rt_se)
|
||||
destroy_rt_bandwidth(&tg->rt_bandwidth);
|
||||
|
||||
}
|
||||
|
||||
void free_rt_sched_group(struct task_group *tg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (tg->rt_se)
|
||||
destroy_rt_bandwidth(&tg->rt_bandwidth);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (tg->rt_rq)
|
||||
kfree(tg->rt_rq[i]);
|
||||
|
@ -250,6 +254,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
|||
return &rq->rt;
|
||||
}
|
||||
|
||||
void unregister_rt_sched_group(struct task_group *tg) { }
|
||||
|
||||
void free_rt_sched_group(struct task_group *tg) { }
|
||||
|
||||
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
|
|
|
@ -488,6 +488,7 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
|
|||
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
|
||||
|
||||
extern void unregister_rt_sched_group(struct task_group *tg);
|
||||
extern void free_rt_sched_group(struct task_group *tg);
|
||||
extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
|
||||
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
|
||||
|
@ -503,7 +504,7 @@ extern struct task_group *sched_create_group(struct task_group *parent);
|
|||
extern void sched_online_group(struct task_group *tg,
|
||||
struct task_group *parent);
|
||||
extern void sched_destroy_group(struct task_group *tg);
|
||||
extern void sched_offline_group(struct task_group *tg);
|
||||
extern void sched_release_group(struct task_group *tg);
|
||||
|
||||
extern void sched_move_task(struct task_struct *tsk);
|
||||
|
||||
|
|
Loading…
Reference in New Issue