cacheinfo and arch_topology updates for v5.20

These are updates to fix some discrepancies we have in the CPU topology parsing from the device tree /cpu-map node and the divergence from the behaviour on a ACPI enabled platform. The expectation is that both DT and ACPI enabled systems must present consistent view of the CPU topology. The current assignment of generated cluster count as the physical package identifier for each CPU is wrong. The device tree bindings for CPU topology supports sockets to infer the socket or physical package identifier for a given CPU. It is now being made use of you address the issue. These updates also assigns the cluster identifier as parsed from the device tree cluster nodes within /cpu-map without support for nesting of the clusters as there are no such reported/known platforms. In order to be on par with ACPI PPTT physical package/socket support, these updates also include support for socket nodes in /cpu-map. The only exception is that the last level cache id information can be inferred from the same ACPI PPTT while we need to parse CPU cache nodes in the device tree. The cacheinfo changes here is to enable the re-use of the cacheinfo to detect the cache attributes for all the CPU quite early even before the scondardaries are booted so that the information can be used to build the schedular domains especially the last level cache(LLC). -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEunHlEgbzHrJD3ZPhAEG6vDF+4pgFAmLFak4ACgkQAEG6vDF+ 4pjHNxAAyzXpazkWuTjTot9UcX2TE8kIlEB4wXoGr1WJfi2uefVuvo3owvSKLdmr pBNUf0fSLKShljueXOYcmwxVvSoN3zWSrOh4huUBWv0VPBg5yyNplZChwbhxjmiL N5FGtSDHoTgPjjMUXnlsa/Y7/RDUhxR+s4ZdGS/vnMHbGr8Gsm5bjc6BNCq9E6cz xnGDzUOS3+Sin+751De09HIuH5FOoCEpWOj6FGVK3MtEsizHU4ANEKTgFdsE26mG nmVY1CU3GJmHluvG1JgL4+HmZsW02h2yU0tRSLqcseJCUou8gJ5yr0wYF6wmsHGk nzGDfV7GzLdQg5rVnWcgzrzibqbBKJvh795e2cW3tV60VjMxlh57L7OWnHAEzQh8 QCF8HeE2CGg6VlYC+oB5JZ4pLdPE69e0+fHzhFy7hqK/B9yOr0olIKXxcm4tR/TV 5Ri7D8bX4Oviq1pQcT+GE/8Of5vX5R9LTiH1V0ld38npVvA55KDnO6WKvcadEucO tKnHZx2dZYR/sRJ8ABz4hb7+UlLiLpCPFudx+BcXLHn+nWSqXYlu+F/2D2nsGRP+ HpmJHISJ40gD669KvupLg0/TtPdQ1oRmuf9CXUiMkzQZcySIW4wmFvCvbUfMcApl 7OzQ+FtAPb7n821mSV7KJBYJ5xOxNVR7DfUovmXCa/91xfqmE1M= =UCxa -----END PGP SIGNATURE----- Merge tag 'arch-cache-topo-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into driver-core-next Sudeep writes: cacheinfo and arch_topology updates for v5.20 These are updates to fix some discrepancies we have in the CPU topology parsing from the device tree /cpu-map node and the divergence from the behaviour on a ACPI enabled platform. The expectation is that both DT and ACPI enabled systems must present consistent view of the CPU topology. The current assignment of generated cluster count as the physical package identifier for each CPU is wrong. The device tree bindings for CPU topology supports sockets to infer the socket or physical package identifier for a given CPU. It is now being made use of you address the issue. These updates also assigns the cluster identifier as parsed from the device tree cluster nodes within /cpu-map without support for nesting of the clusters as there are no such reported/known platforms. In order to be on par with ACPI PPTT physical package/socket support, these updates also include support for socket nodes in /cpu-map. The only exception is that the last level cache id information can be inferred from the same ACPI PPTT while we need to parse CPU cache nodes in the device tree. The cacheinfo changes here is to enable the re-use of the cacheinfo to detect the cache attributes for all the CPU quite early even before the scondardaries are booted so that the information can be used to build the schedular domains especially the last level cache(LLC). * tag 'arch-cache-topo-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux: (21 commits) ACPI: Remove the unused find_acpi_cpu_cache_topology() arch_topology: Warn that topology for nested clusters is not supported arch_topology: Add support for parsing sockets in /cpu-map arch_topology: Set cluster identifier in each core/thread from /cpu-map arch_topology: Limit span of cpu_clustergroup_mask() arch_topology: Don't set cluster identifier as physical package identifier arch_topology: Avoid parsing through all the CPUs once a outlier CPU is found arch_topology: Check for non-negative value rather than -1 for IDs validity arch_topology: Set thread sibling cpumask only within the cluster arch_topology: Drop LLC identifier stash from the CPU topology arm64: topology: Remove redundant setting of llc_id in CPU topology arch_topology: Use the last level cache information from the cacheinfo arch_topology: Add support to parse and detect cache attributes cacheinfo: Align checks in cache_shared_cpu_map_{setup,remove} for readability cacheinfo: Use cache identifiers to check if the caches are shared if available cacheinfo: Allow early detection and population of cache attributes cacheinfo: Add support to check if last level cache(LLC) is valid or shared cacheinfo: Move cache_leaves_are_shared out of CONFIG_OF cacheinfo: Add helper to access any cache index for a given CPU cacheinfo: Use of_cpu_device_node_get instead cpu_dev->of_node ...
2022-07-08 15:17:52 +02:00 · 2022-07-08 15:17:52 +02:00 · 2c8f7ef4b3
parent 2fd26970cf 7128af87c7
commit 2c8f7ef4b3
7 changed files with 177 additions and 135 deletions
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@ -89,8 +89,6 @@ int __init parse_acpi_topology(void)
 		return 0;

 	for_each_possible_cpu(cpu) {
-		int i, cache_id;
-
 		topology_id = find_acpi_cpu_topology(cpu, 0);
 		if (topology_id < 0)
 			return topology_id;
@ -107,18 +105,6 @@ int __init parse_acpi_topology(void)
 		cpu_topology[cpu].cluster_id = topology_id;
 		topology_id = find_acpi_cpu_topology_package(cpu);
 		cpu_topology[cpu].package_id = topology_id;
-
-		i = acpi_find_last_cache_level(cpu);
-
-		if (i > 0) {
-			/*
-			 * this is the only part of cpu_topology that has
-			 * a direct relationship with the cache topology
-			 */
-			cache_id = find_acpi_cpu_cache_topology(cpu, i);
-			if (cache_id > 0)
-				cpu_topology[cpu].llc_id = cache_id;
-		}
 	}

 	return 0;
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@ -437,7 +437,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
 		pr_debug("found = %p %p\n", found_cache, cpu_node);
 		if (found_cache)
 			update_cache_properties(this_leaf, found_cache,
-			                        cpu_node, table->revision);
+						ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
+						table->revision);

 		index++;
 	}
@ -690,43 +691,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level)
 	return find_acpi_cpu_topology_tag(cpu, level, 0);
 }

-/**
- * find_acpi_cpu_cache_topology() - Determine a unique cache topology value
- * @cpu: Kernel logical CPU number
- * @level: The cache level for which we would like a unique ID
- *
- * Determine a unique ID for each unified cache in the system
- *
- * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
- * Otherwise returns a value which represents a unique topological feature.
- */
-int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
-{
-	struct acpi_table_header *table;
-	struct acpi_pptt_cache *found_cache;
-	acpi_status status;
-	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
-	struct acpi_pptt_processor *cpu_node = NULL;
-	int ret = -1;
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
-	if (ACPI_FAILURE(status)) {
-		acpi_pptt_warn_missing();
-		return -ENOENT;
-	}
-
-	found_cache = acpi_find_cache_node(table, acpi_cpu_id,
-					   CACHE_TYPE_UNIFIED,
-					   level,
-					   &cpu_node);
-	if (found_cache)
-		ret = ACPI_PTR_DIFF(cpu_node, table);
-
-	acpi_put_table(table);
-
-	return ret;
-}
-
 /**
 * find_acpi_cpu_topology_package() - Determine a unique CPU package value
 * @cpu: Kernel logical CPU number
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@ -7,6 +7,7 @@
 */

 #include <linux/acpi.h>
+#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/device.h>
@ -496,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node)
 }

 static int __init parse_core(struct device_node *core, int package_id,
-			     int core_id)
+			     int cluster_id, int core_id)
 {
 	char name[20];
 	bool leaf = true;
@ -512,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 			cpu = get_cpu_for_node(t);
 			if (cpu >= 0) {
 				cpu_topology[cpu].package_id = package_id;
+				cpu_topology[cpu].cluster_id = cluster_id;
 				cpu_topology[cpu].core_id = core_id;
 				cpu_topology[cpu].thread_id = i;
 			} else if (cpu != -ENODEV) {
@ -533,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 		}

 		cpu_topology[cpu].package_id = package_id;
+		cpu_topology[cpu].cluster_id = cluster_id;
 		cpu_topology[cpu].core_id = core_id;
 	} else if (leaf && cpu != -ENODEV) {
 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
@ -542,13 +545,13 @@ static int __init parse_core(struct device_node *core, int package_id,
 	return 0;
 }

-static int __init parse_cluster(struct device_node *cluster, int depth)
+static int __init parse_cluster(struct device_node *cluster, int package_id,
+				int cluster_id, int depth)
 {
 	char name[20];
 	bool leaf = true;
 	bool has_cores = false;
 	struct device_node *c;
-	static int package_id __initdata;
 	int core_id = 0;
 	int i, ret;

@ -563,7 +566,9 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 		c = of_get_child_by_name(cluster, name);
 		if (c) {
 			leaf = false;
-			ret = parse_cluster(c, depth + 1);
+			ret = parse_cluster(c, package_id, i, depth + 1);
+			if (depth > 0)
+				pr_warn("Topology for clusters of clusters not yet supported\n");
 			of_node_put(c);
 			if (ret != 0)
 				return ret;
@ -587,7 +592,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 			}

 			if (leaf) {
-				ret = parse_core(c, package_id, core_id++);
+				ret = parse_core(c, package_id, cluster_id,
+						 core_id++);
 			} else {
 				pr_err("%pOF: Non-leaf cluster with core %s\n",
 				       cluster, name);
@ -604,12 +610,35 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	if (leaf && !has_cores)
 		pr_warn("%pOF: empty cluster\n", cluster);

-	if (leaf)
-		package_id++;
-
 	return 0;
 }

+static int __init parse_socket(struct device_node *socket)
+{
+	char name[20];
+	struct device_node *c;
+	bool has_socket = false;
+	int package_id = 0, ret;
+
+	do {
+		snprintf(name, sizeof(name), "socket%d", package_id);
+		c = of_get_child_by_name(socket, name);
+		if (c) {
+			has_socket = true;
+			ret = parse_cluster(c, package_id, -1, 0);
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		package_id++;
+	} while (c);
+
+	if (!has_socket)
+		ret = parse_cluster(socket, 0, -1, 0);
+
+	return ret;
+}
+
 static int __init parse_dt_topology(void)
 {
 	struct device_node *cn, *map;
@ -630,7 +659,7 @@ static int __init parse_dt_topology(void)
 	if (!map)
 		goto out;

-	ret = parse_cluster(map, 0);
+	ret = parse_socket(map);
 	if (ret != 0)
 		goto out_map;

@ -641,8 +670,10 @@ static int __init parse_dt_topology(void)
 	 * only mark cores described in the DT as possible.
 	 */
 	for_each_possible_cpu(cpu)
-		if (cpu_topology[cpu].package_id == -1)
+		if (cpu_topology[cpu].package_id < 0) {
 			ret = -EINVAL;
+			break;
+		}

 out_map:
 	of_node_put(map);
@ -667,7 +698,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 		/* not numa in package, lets use the package siblings */
 		core_mask = &cpu_topology[cpu].core_sibling;
 	}
-	if (cpu_topology[cpu].llc_id != -1) {
+
+	if (last_level_cache_is_valid(cpu)) {
 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 			core_mask = &cpu_topology[cpu].llc_sibling;
 	}
@ -686,6 +718,14 @@ const struct cpumask *cpu_coregroup_mask(int cpu)

 const struct cpumask *cpu_clustergroup_mask(int cpu)
 {
+	/*
+	 * Forbid cpu_clustergroup_mask() to span more or the same CPUs as
+	 * cpu_coregroup_mask().
+	 */
+	if (cpumask_subset(cpu_coregroup_mask(cpu),
+			   &cpu_topology[cpu].cluster_sibling))
+		return get_cpu_mask(cpu);
+
 	return &cpu_topology[cpu].cluster_sibling;
 }

@ -698,7 +738,7 @@ void update_siblings_masks(unsigned int cpuid)
 	for_each_online_cpu(cpu) {
 		cpu_topo = &cpu_topology[cpu];

-		if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
+		if (last_level_cache_is_shared(cpu, cpuid)) {
 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 		}
@ -706,15 +746,17 @@ void update_siblings_masks(unsigned int cpuid)
 		if (cpuid_topo->package_id != cpu_topo->package_id)
 			continue;

-		if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
-		    cpuid_topo->cluster_id != -1) {
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+			continue;
+
+		if (cpuid_topo->cluster_id >= 0) {
 			cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
 			cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
 		}

-		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
-
 		if (cpuid_topo->core_id != cpu_topo->core_id)
 			continue;

@ -750,7 +792,6 @@ void __init reset_cpu_topology(void)
 		cpu_topo->core_id = -1;
 		cpu_topo->cluster_id = -1;
 		cpu_topo->package_id = -1;
-		cpu_topo->llc_id = -1;

 		clear_cpu_topology(cpu);
 	}
@ -780,15 +821,28 @@ __weak int __init parse_acpi_topology(void)
 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 void __init init_cpu_topology(void)
 {
-	reset_cpu_topology();
+	int ret, cpu;

-	/*
-	 * Discard anything that was parsed if we hit an error so we
-	 * don't use partial information.
-	 */
-	if (parse_acpi_topology())
-		reset_cpu_topology();
-	else if (of_have_populated_dt() && parse_dt_topology())
+	reset_cpu_topology();
+	ret = parse_acpi_topology();
+	if (!ret)
+		ret = of_have_populated_dt() && parse_dt_topology();
+
+	if (ret) {
+		/*
+		 * Discard anything that was parsed if we hit an error so we
+		 * don't use partial information.
+		 */
 		reset_cpu_topology();
+		return;
+	}
+
+	for_each_possible_cpu(cpu) {
+		ret = detect_cache_attributes(cpu);
+		if (ret) {
+			pr_info("Early cacheinfo failed, ret = %d\n", ret);
+			break;
+		}
+	}
 }
 #endif
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@ -14,7 +14,7 @@
 #include <linux/cpu.h>
 #include <linux/device.h>
 #include <linux/init.h>
-#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@ -25,19 +25,60 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
 #define ci_cacheinfo(cpu)	(&per_cpu(ci_cpu_cacheinfo, cpu))
 #define cache_leaves(cpu)	(ci_cacheinfo(cpu)->num_leaves)
 #define per_cpu_cacheinfo(cpu)	(ci_cacheinfo(cpu)->info_list)
+#define per_cpu_cacheinfo_idx(cpu, idx)		\
+				(per_cpu_cacheinfo(cpu) + (idx))

 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 {
 	return ci_cacheinfo(cpu);
 }

-#ifdef CONFIG_OF
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
+	/*
+	 * For non DT/ACPI systems, assume unique level 1 caches,
+	 * system-wide shared caches for all other levels. This will be used
+	 * only if arch specific code has not populated shared_cpu_map
+	 */
+	if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI)))
+		return !(this_leaf->level == 1);
+
+	if ((sib_leaf->attributes & CACHE_ID) &&
+	    (this_leaf->attributes & CACHE_ID))
+		return sib_leaf->id == this_leaf->id;
+
 	return sib_leaf->fw_token == this_leaf->fw_token;
 }

+bool last_level_cache_is_valid(unsigned int cpu)
+{
+	struct cacheinfo *llc;
+
+	if (!cache_leaves(cpu))
+		return false;
+
+	llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+	return (llc->attributes & CACHE_ID) || !!llc->fw_token;
+
+}
+
+bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y)
+{
+	struct cacheinfo *llc_x, *llc_y;
+
+	if (!last_level_cache_is_valid(cpu_x) ||
+	    !last_level_cache_is_valid(cpu_y))
+		return false;
+
+	llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1);
+	llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1);
+
+	return cache_leaves_are_shared(llc_x, llc_y);
+}
+
+#ifdef CONFIG_OF
 /* OF properties to query for a given cache type */
 struct cache_type_info {
 	const char *size_prop;
@ -157,27 +198,16 @@ static int cache_setup_of_node(unsigned int cpu)
 {
 	struct device_node *np;
 	struct cacheinfo *this_leaf;
-	struct device *cpu_dev = get_cpu_device(cpu);
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	unsigned int index = 0;

-	/* skip if fw_token is already populated */
-	if (this_cpu_ci->info_list->fw_token) {
-		return 0;
-	}
-
-	if (!cpu_dev) {
-		pr_err("No cpu device for CPU %d\n", cpu);
-		return -ENODEV;
-	}
-	np = cpu_dev->of_node;
+	np = of_cpu_device_node_get(cpu);
 	if (!np) {
 		pr_err("Failed to find cpu%d device node\n", cpu);
 		return -ENOENT;
 	}

 	while (index < cache_leaves(cpu)) {
-		this_leaf = this_cpu_ci->info_list + index;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
 		if (this_leaf->level != 1)
 			np = of_find_next_cache_node(np);
 		else
@ -196,16 +226,6 @@ static int cache_setup_of_node(unsigned int cpu)
 }
 #else
 static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
-					   struct cacheinfo *sib_leaf)
-{
-	/*
-	 * For non-DT/ACPI systems, assume unique level 1 caches, system-wide
-	 * shared caches for all other levels. This will be used only if
-	 * arch specific code has not populated shared_cpu_map
-	 */
-	return !(this_leaf->level == 1);
-}
 #endif

 int __weak cache_setup_acpi(unsigned int cpu)
@ -215,6 +235,18 @@ int __weak cache_setup_acpi(unsigned int cpu)

 unsigned int coherency_max_size;

+static int cache_setup_properties(unsigned int cpu)
+{
+	int ret = 0;
+
+	if (of_have_populated_dt())
+		ret = cache_setup_of_node(cpu);
+	else if (!acpi_disabled)
+		ret = cache_setup_acpi(cpu);
+
+	return ret;
+}
+
 static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@ -225,21 +257,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
 	if (this_cpu_ci->cpu_map_populated)
 		return 0;

-	if (of_have_populated_dt())
-		ret = cache_setup_of_node(cpu);
-	else if (!acpi_disabled)
-		ret = cache_setup_acpi(cpu);
-
-	if (ret)
-		return ret;
+	/*
+	 * skip setting up cache properties if LLC is valid, just need
+	 * to update the shared cpu_map if the cache attributes were
+	 * populated early before all the cpus are brought online
+	 */
+	if (!last_level_cache_is_valid(cpu)) {
+		ret = cache_setup_properties(cpu);
+		if (ret)
+			return ret;
+	}

 	for (index = 0; index < cache_leaves(cpu); index++) {
 		unsigned int i;

-		this_leaf = this_cpu_ci->info_list + index;
-		/* skip if shared_cpu_map is already populated */
-		if (!cpumask_empty(&this_leaf->shared_cpu_map))
-			continue;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);

 		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 		for_each_online_cpu(i) {
@ -247,7 +279,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)

 			if (i == cpu || !sib_cpu_ci->info_list)
 				continue;/* skip if itself or no cacheinfo */
-			sib_leaf = sib_cpu_ci->info_list + index;
+
+			sib_leaf = per_cpu_cacheinfo_idx(i, index);
 			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
 				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
 				cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
@ -263,23 +296,19 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)

 static void cache_shared_cpu_map_remove(unsigned int cpu)
 {
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf, *sib_leaf;
 	unsigned int sibling, index;

 	for (index = 0; index < cache_leaves(cpu); index++) {
-		this_leaf = this_cpu_ci->info_list + index;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
 		for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
-			struct cpu_cacheinfo *sib_cpu_ci;
+			struct cpu_cacheinfo *sib_cpu_ci =
+						get_cpu_cacheinfo(sibling);

-			if (sibling == cpu) /* skip itself */
-				continue;
+			if (sibling == cpu || !sib_cpu_ci->info_list)
+				continue;/* skip if itself or no cacheinfo */

-			sib_cpu_ci = get_cpu_cacheinfo(sibling);
-			if (!sib_cpu_ci->info_list)
-				continue;
-
-			sib_leaf = sib_cpu_ci->info_list + index;
+			sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
 			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
 			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
 		}
@ -310,17 +339,28 @@ int __weak populate_cache_leaves(unsigned int cpu)
 	return -ENOENT;
 }

-static int detect_cache_attributes(unsigned int cpu)
+int detect_cache_attributes(unsigned int cpu)
 {
 	int ret;

+	/* Since early detection of the cacheinfo is allowed via this
+	 * function and this also gets called as CPU hotplug callbacks via
+	 * cacheinfo_cpu_online, the initialisation can be skipped and only
+	 * CPU maps can be updated as the CPU online status would be update
+	 * if called via cacheinfo_cpu_online path.
+	 */
+	if (per_cpu_cacheinfo(cpu))
+		goto update_cpu_map;
+
 	if (init_cache_level(cpu) || !cache_leaves(cpu))
 		return -ENOENT;

 	per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
 					 sizeof(struct cacheinfo), GFP_KERNEL);
-	if (per_cpu_cacheinfo(cpu) == NULL)
+	if (per_cpu_cacheinfo(cpu) == NULL) {
+		cache_leaves(cpu) = 0;
 		return -ENOMEM;
+	}

 	/*
 	 * populate_cache_leaves() may completely setup the cache leaves and
@ -329,6 +369,8 @@ static int detect_cache_attributes(unsigned int cpu)
 	ret = populate_cache_leaves(cpu);
 	if (ret)
 		goto free_ci;
+
+update_cpu_map:
 	/*
 	 * For systems using DT for cache hierarchy, fw_token
 	 * and shared_cpu_map will be set up here only if they are
@ -614,7 +656,6 @@ static int cache_add_dev(unsigned int cpu)
 	int rc;
 	struct device *ci_dev, *parent;
 	struct cacheinfo *this_leaf;
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	const struct attribute_group **cache_groups;

 	rc = cpu_cache_sysfs_init(cpu);
@ -623,7 +664,7 @@ static int cache_add_dev(unsigned int cpu)

 	parent = per_cpu_cache_dev(cpu);
 	for (i = 0; i < cache_leaves(cpu); i++) {
-		this_leaf = this_cpu_ci->info_list + i;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, i);
 		if (this_leaf->disable_sysfs)
 			continue;
 		if (this_leaf->type == CACHE_TYPE_NOCACHE)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@ -1429,7 +1429,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level);
 int find_acpi_cpu_topology_cluster(unsigned int cpu);
 int find_acpi_cpu_topology_package(unsigned int cpu);
 int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
-int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
 #else
 static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
 {
@ -1451,10 +1450,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 {
 	return -EINVAL;
 }
-static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
-{
-	return -EINVAL;
-}
 #endif

 #ifdef CONFIG_ACPI_PCC
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@ -68,7 +68,6 @@ struct cpu_topology {
 	int core_id;
 	int cluster_id;
 	int package_id;
-	int llc_id;
 	cpumask_t thread_sibling;
 	cpumask_t core_sibling;
 	cpumask_t cluster_sibling;
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@ -82,6 +82,9 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
 int init_cache_level(unsigned int cpu);
 int populate_cache_leaves(unsigned int cpu);
 int cache_setup_acpi(unsigned int cpu);
+bool last_level_cache_is_valid(unsigned int cpu);
+bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+int detect_cache_attributes(unsigned int cpu);
 #ifndef CONFIG_ACPI_PPTT
 /*
 * acpi_find_last_cache_level is only called on ACPI enabled