From 24fc7edb92eea05946119cc0258c891c26b3b469 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 May 2016 10:37:59 +0200 Subject: [PATCH] sched/core: Introduce 'struct sched_domain_shared' Since struct sched_domain is strictly per cpu; introduce a structure that is shared between all 'identical' sched_domains. Limit to SD_SHARE_PKG_RESOURCES domains for now, as we'll only use it for shared cache state; if another use comes up later we can easily relax this. While the sched_group's are normally shared between CPUs, these are not natural to use when we need some shared state on a domain level -- since that would require the domain to have a parent, which is not a given. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ kernel/sched/core.c | 44 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b99fcd1b341e..8a878b9649a1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1067,6 +1067,10 @@ extern int sched_domain_level_max; struct sched_group; +struct sched_domain_shared { + atomic_t ref; +}; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -1135,6 +1139,7 @@ struct sched_domain { void *private; /* used during construction */ struct rcu_head rcu; /* used during destruction */ }; + struct sched_domain_shared *shared; unsigned int span_weight; /* @@ -1168,6 +1173,7 @@ typedef int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain **__percpu sd; + struct sched_domain_shared **__percpu sds; struct sched_group **__percpu sg; struct sched_group_capacity **__percpu sgc; }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 662d08d7b1df..97d3c18d00bf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5947,6 +5947,8 @@ static void destroy_sched_domain(struct sched_domain *sd) kfree(sd->groups->sgc); kfree(sd->groups); } + if (sd->shared && atomic_dec_and_test(&sd->shared->ref)) + kfree(sd->shared); kfree(sd); } @@ -6385,6 +6387,9 @@ static void claim_allocations(int cpu, struct sched_domain *sd) WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); *per_cpu_ptr(sdd->sd, cpu) = NULL; + if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref)) + *per_cpu_ptr(sdd->sds, cpu) = NULL; + if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) *per_cpu_ptr(sdd->sg, cpu) = NULL; @@ -6429,10 +6434,12 @@ static int sched_domains_curr_level; static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, + const struct cpumask *cpu_map, struct sched_domain *child, int cpu) { - struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); - int sd_weight, sd_flags = 0; + struct sd_data *sdd = &tl->data; + struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); + int sd_id, sd_weight, sd_flags = 0; #ifdef CONFIG_NUMA /* @@ -6487,6 +6494,9 @@ sd_init(struct sched_domain_topology_level *tl, #endif }; + cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); + sd_id = cpumask_first(sched_domain_span(sd)); + /* * Convert topological properties into behaviour. */ @@ -6529,7 +6539,16 @@ sd_init(struct sched_domain_topology_level *tl, sd->idle_idx = 1; } - sd->private = &tl->data; + /* + * For all levels sharing cache; connect a sched_domain_shared + * instance. + */ + if (sd->flags & SD_SHARE_PKG_RESOURCES) { + sd->shared = *per_cpu_ptr(sdd->sds, sd_id); + atomic_inc(&sd->shared->ref); + } + + sd->private = sdd; return sd; } @@ -6839,6 +6858,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map) if (!sdd->sd) return -ENOMEM; + sdd->sds = alloc_percpu(struct sched_domain_shared *); + if (!sdd->sds) + return -ENOMEM; + sdd->sg = alloc_percpu(struct sched_group *); if (!sdd->sg) return -ENOMEM; @@ -6849,6 +6872,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) for_each_cpu(j, cpu_map) { struct sched_domain *sd; + struct sched_domain_shared *sds; struct sched_group *sg; struct sched_group_capacity *sgc; @@ -6859,6 +6883,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sd, j) = sd; + sds = kzalloc_node(sizeof(struct sched_domain_shared), + GFP_KERNEL, cpu_to_node(j)); + if (!sds) + return -ENOMEM; + + *per_cpu_ptr(sdd->sds, j) = sds; + sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sg) @@ -6898,6 +6929,8 @@ static void __sdt_free(const struct cpumask *cpu_map) kfree(*per_cpu_ptr(sdd->sd, j)); } + if (sdd->sds) + kfree(*per_cpu_ptr(sdd->sds, j)); if (sdd->sg) kfree(*per_cpu_ptr(sdd->sg, j)); if (sdd->sgc) @@ -6905,6 +6938,8 @@ static void __sdt_free(const struct cpumask *cpu_map) } free_percpu(sdd->sd); sdd->sd = NULL; + free_percpu(sdd->sds); + sdd->sds = NULL; free_percpu(sdd->sg); sdd->sg = NULL; free_percpu(sdd->sgc); @@ -6916,9 +6951,8 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *child, int cpu) { - struct sched_domain *sd = sd_init(tl, child, cpu); + struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu); - cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; sched_domain_level_max = max(sched_domain_level_max, sd->level);