From c82b57756ff26c6d614a27fb903f09b15c7f76fb Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:24 -0700
Subject: [PATCH 01/39] perf/x86/uncore: Save the unit control address of all
 units

Upstream commit: 0007f39325921351b7860a976a730acbb198b9ca
Conflict: none

The unit control address of some CXL units may be wrongly calculated
under some configuration on a EMR machine.

The current implementation only saves the unit control address of the
units from the first die, and the first unit of the rest of dies. Perf
assumed that the units from the other dies have the same offset as the
first die. So the unit control address of the rest of the units can be
calculated. However, the assumption is wrong, especially for the CXL
units.

Introduce an RB tree for each uncore type to save the unit control
address and three kinds of ID information (unit ID, PMU ID, and die ID)
for all units.
The unit ID is a physical ID of a unit.
The PMU ID is a logical ID assigned to a unit. The logical IDs start
from 0 and must be contiguous. The physical ID and the logical ID are
1:1 mapping. The units with the same physical ID in different dies share
the same PMU.
The die ID indicates which die a unit belongs to.

The RB tree can be searched by two different keys (unit ID or PMU ID +
die ID). During the RB tree setup, the unit ID is used as a key to look
up the RB tree. The perf can create/assign a proper PMU ID to the unit.
Later, after the RB tree is setup, PMU ID + die ID is used as a key to
look up the RB tree to fill the cpumask of a PMU. It's used more
frequently, so PMU ID + die ID is compared in the unit_less().
The uncore_find_unit() has to be O(N). But the RB tree setup only occurs
once during the driver load time. It should be acceptable.

Compared with the current implementation, more space is required to save
the information of all units. The extra size should be acceptable.
For example, on EMR, there are 221 units at most. For a 2-socket machine,
the extra space is ~6KB at most.

Intel-SIG: commit 0007f3932592 ("perf/x86/uncore: Save the unit control address of all units")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240614134631.1092359-2-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore_discovery.c | 79 +++++++++++++++++++++++-
 arch/x86/events/intel/uncore_discovery.h | 10 +++
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 9a698a92962a..ce520e69a3c1 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -93,6 +93,8 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
 	if (!type->box_ctrl_die)
 		goto free_type;
 
+	type->units = RB_ROOT;
+
 	type->access_type = unit->access_type;
 	num_discovered_types[type->access_type]++;
 	type->type = unit->box_type;
@@ -120,10 +122,59 @@ get_uncore_discovery_type(struct uncore_unit_discovery *unit)
 	return add_uncore_discovery_type(unit);
 }
 
+static inline bool unit_less(struct rb_node *a, const struct rb_node *b)
+{
+	struct intel_uncore_discovery_unit *a_node, *b_node;
+
+	a_node = rb_entry(a, struct intel_uncore_discovery_unit, node);
+	b_node = rb_entry(b, struct intel_uncore_discovery_unit, node);
+
+	if (a_node->pmu_idx < b_node->pmu_idx)
+		return true;
+	if (a_node->pmu_idx > b_node->pmu_idx)
+		return false;
+
+	if (a_node->die < b_node->die)
+		return true;
+	if (a_node->die > b_node->die)
+		return false;
+
+	return 0;
+}
+
+static inline struct intel_uncore_discovery_unit *
+uncore_find_unit(struct rb_root *root, unsigned int id)
+{
+	struct intel_uncore_discovery_unit *unit;
+	struct rb_node *node;
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+		if (unit->id == id)
+			return unit;
+	}
+
+	return NULL;
+}
+
+static void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
+				 struct rb_root *root, u16 *num_units)
+{
+	struct intel_uncore_discovery_unit *unit = uncore_find_unit(root, node->id);
+
+	if (unit)
+		node->pmu_idx = unit->pmu_idx;
+	else if (num_units)
+		node->pmu_idx = (*num_units)++;
+
+	rb_add(&node->node, root, unit_less);
+}
+
 static void
 uncore_insert_box_info(struct uncore_unit_discovery *unit,
 		       int die, bool parsed)
 {
+	struct intel_uncore_discovery_unit *node;
 	struct intel_uncore_discovery_type *type;
 	unsigned int *ids;
 	u64 *box_offset;
@@ -136,14 +187,26 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
 		return;
 	}
 
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return;
+
+	node->die = die;
+	node->id = unit->box_id;
+	node->addr = unit->ctl;
+
 	if (parsed) {
 		type = search_uncore_discovery_type(unit->box_type);
 		if (!type) {
 			pr_info("A spurious uncore type %d is detected, "
 				"Disable the uncore type.\n",
 				unit->box_type);
+			kfree(node);
 			return;
 		}
+
+		uncore_find_add_unit(node, &type->units, &type->num_units);
+
 		/* Store the first box of each die */
 		if (!type->box_ctrl_die[die])
 			type->box_ctrl_die[die] = unit->ctl;
@@ -152,16 +215,18 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
 
 	type = get_uncore_discovery_type(unit);
 	if (!type)
-		return;
+		goto free_node;
 
 	box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL);
 	if (!box_offset)
-		return;
+		goto free_node;
 
 	ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL);
 	if (!ids)
 		goto free_box_offset;
 
+	uncore_find_add_unit(node, &type->units, &type->num_units);
+
 	/* Store generic information for the first box */
 	if (!type->num_boxes) {
 		type->box_ctrl = unit->ctl;
@@ -201,6 +266,8 @@ free_ids:
 free_box_offset:
 	kfree(box_offset);
 
+free_node:
+	kfree(node);
 }
 
 static bool
@@ -339,8 +406,16 @@ err:
 void intel_uncore_clear_discovery_tables(void)
 {
 	struct intel_uncore_discovery_type *type, *next;
+	struct intel_uncore_discovery_unit *pos;
+	struct rb_node *node;
 
 	rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) {
+		while (!RB_EMPTY_ROOT(&type->units)) {
+			node = rb_first(&type->units);
+			pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
+			rb_erase(node, &type->units);
+			kfree(pos);
+		}
 		kfree(type->box_ctrl_die);
 		kfree(type);
 	}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 22e769a81103..5190017aba51 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -113,17 +113,27 @@ struct uncore_unit_discovery {
 	};
 };
 
+struct intel_uncore_discovery_unit {
+	struct rb_node	node;
+	unsigned int	pmu_idx;	/* The idx of the corresponding PMU */
+	unsigned int	id;		/* Unit ID */
+	unsigned int	die;		/* Die ID */
+	u64		addr;		/* Unit Control Address */
+};
+
 struct intel_uncore_discovery_type {
 	struct rb_node	node;
 	enum uncore_access_type	access_type;
 	u64		box_ctrl;	/* Unit ctrl addr of the first box */
 	u64		*box_ctrl_die;	/* Unit ctrl addr of the first box of each die */
+	struct rb_root	units;		/* Unit ctrl addr for all units */
 	u16		type;		/* Type ID of the uncore block */
 	u8		num_counters;
 	u8		counter_width;
 	u8		ctl_offset;	/* Counter Control 0 offset */
 	u8		ctr_offset;	/* Counter 0 offset */
 	u16		num_boxes;	/* number of boxes for the uncore block */
+	u16		num_units;	/* number of units */
 	unsigned int	*ids;		/* Box IDs */
 	u64		*box_offset;	/* Box offset */
 };

From 80f67e43fa17a5d1de2986a22d319c2e28c8df20 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:25 -0700
Subject: [PATCH 02/39] perf/x86/uncore: Support per PMU cpumask

Upstream commit: c74443d92f68f07c03ae242ced554b749e6c6736
Conflict: none

The cpumask of some uncore units, e.g., CXL uncore units, may be wrong
under some configurations. Perf may access an uncore counter of a
non-existent uncore unit.

The uncore driver assumes that all uncore units are symmetric among
dies. A global cpumask is shared among all uncore PMUs. However, some
CXL uncore units may only be available on some dies.

A per PMU cpumask is introduced to track the CPU mask of this PMU.
The driver searches the unit control RB tree to check whether the PMU is
available on a given die, and updates the per PMU cpumask accordingly.

Intel-SIG: commit c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-3-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore.c           | 31 +++++++++++--
 arch/x86/events/intel/uncore.h           |  2 +
 arch/x86/events/intel/uncore_discovery.c | 58 ++++++++++++++++++++++++
 arch/x86/events/intel/uncore_discovery.h |  3 ++
 4 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 4e26a28536de..ec937ce83c55 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -843,7 +843,9 @@ static void uncore_pmu_disable(struct pmu *pmu)
 static ssize_t uncore_get_attr_cpumask(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
+	struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask);
 }
 
 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
@@ -1453,6 +1455,18 @@ static void uncore_pci_exit(void)
 	}
 }
 
+static bool uncore_die_has_box(struct intel_uncore_type *type,
+			       int die, unsigned int pmu_idx)
+{
+	if (!type->boxes)
+		return true;
+
+	if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0)
+		return false;
+
+	return true;
+}
+
 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
 				   int new_cpu)
 {
@@ -1468,18 +1482,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
 
 		if (old_cpu < 0) {
 			WARN_ON_ONCE(box->cpu != -1);
-			box->cpu = new_cpu;
+			if (uncore_die_has_box(type, die, pmu->pmu_idx)) {
+				box->cpu = new_cpu;
+				cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
+			}
 			continue;
 		}
 
-		WARN_ON_ONCE(box->cpu != old_cpu);
+		WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu);
 		box->cpu = -1;
+		cpumask_clear_cpu(old_cpu, &pmu->cpu_mask);
 		if (new_cpu < 0)
 			continue;
 
+		if (!uncore_die_has_box(type, die, pmu->pmu_idx))
+			continue;
 		uncore_pmu_cancel_hrtimer(box);
 		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
 		box->cpu = new_cpu;
+		cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
 	}
 }
 
@@ -1502,7 +1523,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id)
 		pmu = type->pmus;
 		for (i = 0; i < type->num_boxes; i++, pmu++) {
 			box = pmu->boxes[id];
-			if (box && atomic_dec_return(&box->refcnt) == 0)
+			if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0)
 				uncore_box_exit(box);
 		}
 	}
@@ -1592,7 +1613,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
 		pmu = type->pmus;
 		for (i = 0; i < type->num_boxes; i++, pmu++) {
 			box = pmu->boxes[id];
-			if (box && atomic_inc_return(&box->refcnt) == 1)
+			if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1)
 				uncore_box_init(box);
 		}
 	}
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4838502d89ae..0a49e304fe40 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -86,6 +86,7 @@ struct intel_uncore_type {
 	const struct attribute_group *attr_groups[4];
 	const struct attribute_group **attr_update;
 	struct pmu *pmu; /* for custom pmu ops */
+	struct rb_root *boxes;
 	/*
 	 * Uncore PMU would store relevant platform topology configuration here
 	 * to identify which platform component each PMON block of that type is
@@ -125,6 +126,7 @@ struct intel_uncore_pmu {
 	int				func_id;
 	bool				registered;
 	atomic_t			activeboxes;
+	cpumask_t			cpu_mask;
 	struct intel_uncore_type	*type;
 	struct intel_uncore_box		**boxes;
 };
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index ce520e69a3c1..e61e460520a8 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -122,6 +122,64 @@ get_uncore_discovery_type(struct uncore_unit_discovery *unit)
 	return add_uncore_discovery_type(unit);
 }
 
+static inline int pmu_idx_cmp(const void *key, const struct rb_node *b)
+{
+	struct intel_uncore_discovery_unit *unit;
+	const unsigned int *id = key;
+
+	unit = rb_entry(b, struct intel_uncore_discovery_unit, node);
+
+	if (unit->pmu_idx > *id)
+		return -1;
+	else if (unit->pmu_idx < *id)
+		return 1;
+
+	return 0;
+}
+
+static struct intel_uncore_discovery_unit *
+intel_uncore_find_discovery_unit(struct rb_root *units, int die,
+				 unsigned int pmu_idx)
+{
+	struct intel_uncore_discovery_unit *unit;
+	struct rb_node *pos;
+
+	if (!units)
+		return NULL;
+
+	pos = rb_find_first(&pmu_idx, units, pmu_idx_cmp);
+	if (!pos)
+		return NULL;
+	unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
+
+	if (die < 0)
+		return unit;
+
+	for (; pos; pos = rb_next(pos)) {
+		unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
+
+		if (unit->pmu_idx != pmu_idx)
+			break;
+
+		if (unit->die == die)
+			return unit;
+	}
+
+	return NULL;
+}
+
+int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
+					unsigned int pmu_idx)
+{
+	struct intel_uncore_discovery_unit *unit;
+
+	unit = intel_uncore_find_discovery_unit(units, die, pmu_idx);
+	if (unit)
+		return unit->id;
+
+	return -1;
+}
+
 static inline bool unit_less(struct rb_node *a, const struct rb_node *b)
 {
 	struct intel_uncore_discovery_unit *a_node, *b_node;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 5190017aba51..96265cf1fc86 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -166,3 +166,6 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
 
 struct intel_uncore_type **
 intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
+
+int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
+					unsigned int pmu_idx);

From a04d8c5821c77888cb33f9c7f1ee6f49976e418e Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:26 -0700
Subject: [PATCH 03/39] perf/x86/uncore: Retrieve the unit ID from the unit
 control RB tree

Upstream commit: 585463fee64270d4b4d80b1e433d2105ef555bec
Conflict: none

The box_ids only save the unit ID for the first die. If a unit, e.g., a
CXL unit, doesn't exist in the first die. The unit ID cannot be
retrieved.

The unit control RB tree also stores the unit ID information.
Retrieve the unit ID from the unit control RB tree

Intel-SIG: commit 585463fee642 ("perf/x86/uncore: Retrieve the unit ID from the unit control RB tree")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-4-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index ec937ce83c55..dcf0721adefe 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -862,6 +862,9 @@ static const struct attribute_group uncore_pmu_attr_group = {
 static inline int uncore_get_box_id(struct intel_uncore_type *type,
 				    struct intel_uncore_pmu *pmu)
 {
+	if (type->boxes)
+		return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx);
+
 	return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
 }
 

From 8675b53d5a439e4abd3cc594b257f01ecff36534 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:27 -0700
Subject: [PATCH 04/39] perf/x86/uncore: Apply the unit control RB tree to MMIO
 uncore units

Upstream commit: 80580dae65b941eb681bd79f31f64f91b58232b4
Conflict: none

The unit control RB tree has the unit control and unit ID information
for all the units. Use it to replace the box_ctls/mmio_offsets to get
an accurate unit control address for MMIO uncore units.

Intel-SIG: commit 80580dae65b9 ("perf/x86/uncore: Apply the unit control RB tree to MMIO uncore units")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-5-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore_discovery.c | 32 +++++++++++-------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index e61e460520a8..ece761c9f17a 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -606,34 +606,30 @@ static struct intel_uncore_ops generic_uncore_pci_ops = {
 
 #define UNCORE_GENERIC_MMIO_SIZE		0x4000
 
-static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
-{
-	struct intel_uncore_type *type = box->pmu->type;
-
-	if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets)
-		return 0;
-
-	return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
-}
-
 void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
 {
-	u64 box_ctl = generic_uncore_mmio_box_ctl(box);
+	static struct intel_uncore_discovery_unit *unit;
 	struct intel_uncore_type *type = box->pmu->type;
 	resource_size_t addr;
 
-	if (!box_ctl) {
-		pr_warn("Uncore type %d box %d: Invalid box control address.\n",
-			type->type_id, type->box_ids[box->pmu->pmu_idx]);
+	unit = intel_uncore_find_discovery_unit(type->boxes, box->dieid, box->pmu->pmu_idx);
+	if (!unit) {
+		pr_warn("Uncore type %d id %d: Cannot find box control address.\n",
+			type->type_id, box->pmu->pmu_idx);
 		return;
 	}
 
-	addr = box_ctl;
+	if (!unit->addr) {
+		pr_warn("Uncore type %d box %d: Invalid box control address.\n",
+			type->type_id, unit->id);
+		return;
+	}
+
+	addr = unit->addr;
 	box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE);
 	if (!box->io_addr) {
 		pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n",
-			type->type_id, type->box_ids[box->pmu->pmu_idx],
-			(unsigned long long)addr);
+			type->type_id, unit->id, (unsigned long long)addr);
 		return;
 	}
 
@@ -722,6 +718,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
 		uncore->box_ctls = type->box_ctrl_die;
 		uncore->mmio_offsets = type->box_offset;
 		uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE;
+		uncore->boxes = &type->units;
+		uncore->num_boxes = type->num_units;
 		break;
 	default:
 		return false;

From dbfc137d110fdfc90ae2e571ae6b0bc19dc7e3a1 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:28 -0700
Subject: [PATCH 05/39] perf/x86/uncore: Apply the unit control RB tree to MSR
 uncore units

Upstream commit: b1d9ea2e1ca44987c8409cc628dfb0c84e93dce9
Conflict: none

The unit control RB tree has the unit control and unit ID information
for all the MSR units. Use them to replace the box_ctl and
uncore_msr_box_ctl() to get an accurate unit control address for MSR
uncore units.

Add intel_generic_uncore_assign_hw_event(), which utilizes the accurate
unit control address from the unit control RB tree to calculate the
config_base and event_base.

The unit id related information should be retrieved from the unit
control RB tree as well.

Intel-SIG: commit b1d9ea2e1ca4 ("perf/x86/uncore: Apply the unit control RB tree to MSR uncore units")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-6-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore.c           |  3 ++
 arch/x86/events/intel/uncore_discovery.c | 49 +++++++++++++++++++++---
 arch/x86/events/intel/uncore_discovery.h |  2 +
 arch/x86/events/intel/uncore_snbep.c     | 16 +++++---
 4 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index dcf0721adefe..6e9469325ad0 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -263,6 +263,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
 		return;
 	}
 
+	if (intel_generic_uncore_assign_hw_event(event, box))
+		return;
+
 	hwc->config_base = uncore_event_ctl(box, hwc->idx);
 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 }
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index ece761c9f17a..076ec1efe9cc 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -499,19 +499,31 @@ static const struct attribute_group generic_uncore_format_group = {
 	.attrs = generic_uncore_formats_attr,
 };
 
+static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box)
+{
+	struct intel_uncore_discovery_unit *unit;
+
+	unit = intel_uncore_find_discovery_unit(box->pmu->type->boxes,
+						-1, box->pmu->pmu_idx);
+	if (WARN_ON_ONCE(!unit))
+		return 0;
+
+	return unit->addr;
+}
+
 void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
 {
-	wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
+	wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
 }
 
 void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
 {
-	wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
+	wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
 }
 
 void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
 {
-	wrmsrl(uncore_msr_box_ctl(box), 0);
+	wrmsrl(intel_generic_uncore_box_ctl(box), 0);
 }
 
 static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box,
@@ -539,6 +551,31 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
 	.read_counter		= uncore_msr_read_counter,
 };
 
+bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
+					  struct intel_uncore_box *box)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 box_ctl;
+
+	if (!box->pmu->type->boxes)
+		return false;
+
+	if (box->pci_dev || box->io_addr) {
+		hwc->config_base = uncore_pci_event_ctl(box, hwc->idx);
+		hwc->event_base  = uncore_pci_perf_ctr(box, hwc->idx);
+		return true;
+	}
+
+	box_ctl = intel_generic_uncore_box_ctl(box);
+	if (!box_ctl)
+		return false;
+
+	hwc->config_base = box_ctl + box->pmu->type->event_ctl + hwc->idx;
+	hwc->event_base  = box_ctl + box->pmu->type->perf_ctr + hwc->idx;
+
+	return true;
+}
+
 void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
@@ -697,10 +734,12 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
 	switch (type_id) {
 	case UNCORE_ACCESS_MSR:
 		uncore->ops = &generic_uncore_msr_ops;
-		uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset;
-		uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset;
+		uncore->perf_ctr = (unsigned int)type->ctr_offset;
+		uncore->event_ctl = (unsigned int)type->ctl_offset;
 		uncore->box_ctl = (unsigned int)type->box_ctrl;
 		uncore->msr_offsets = type->box_offset;
+		uncore->boxes = &type->units;
+		uncore->num_boxes = type->num_units;
 		break;
 	case UNCORE_ACCESS_PCI:
 		uncore->ops = &generic_uncore_pci_ops;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 96265cf1fc86..4a7a7c819d6f 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -169,3 +169,5 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra
 
 int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
 					unsigned int pmu_idx);
+bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
+					  struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 1efbacfff47d..7bd34548b8c1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5929,10 +5929,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 	bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
 	struct intel_uncore_type *type = box->pmu->type;
+	int id = intel_uncore_find_discovery_unit_id(type->boxes, -1, box->pmu->pmu_idx);
 
 	if (tie_en) {
 		reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
-			    HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx];
+			    HSWEP_CBO_MSR_OFFSET * id;
 		reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
 		reg1->idx = 0;
 	}
@@ -6456,18 +6457,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
 static int uncore_type_max_boxes(struct intel_uncore_type **types,
 				 int type_id)
 {
+	struct intel_uncore_discovery_unit *unit;
 	struct intel_uncore_type *type;
-	int i, max = 0;
+	struct rb_node *node;
+	int max = 0;
 
 	type = uncore_find_type_by_id(types, type_id);
 	if (!type)
 		return 0;
 
-	for (i = 0; i < type->num_boxes; i++) {
-		if (type->box_ids[i] > max)
-			max = type->box_ids[i];
-	}
+	for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+		unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
 
+		if (unit->id > max)
+			max = unit->id;
+	}
 	return max + 1;
 }
 

From 2a781ae3d6e42c71d712a94c4e87ae4e42514aca Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:29 -0700
Subject: [PATCH 06/39] perf/x86/uncore: Apply the unit control RB tree to PCI
 uncore units

Upstream commit: f76a8420444beb1c3968504c8176a67d2d5fe18f
Conflict: none

The unit control RB tree has the unit control and unit ID information
for all the PCI units. Use them to replace the box_ctls/pci_offsets to
get an accurate unit control address for PCI uncore units.

The UPI/M3UPI units in the discovery table are ignored. Please see the
commit 65248a9a9ee1 ("perf/x86/uncore: Add a quirk for UPI on SPR").
Manually allocate a unit control RB tree for UPI/M3UPI.
Add cleanup_extra_boxes to release such manual allocation.

Intel-SIG: commit f76a8420444b ("perf/x86/uncore: Apply the unit control RB tree to PCI uncore units")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-7-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore.c           | 51 ++++++++++-----------
 arch/x86/events/intel/uncore.h           |  4 ++
 arch/x86/events/intel/uncore_discovery.c | 26 ++++++++---
 arch/x86/events/intel/uncore_discovery.h |  2 +
 arch/x86/events/intel/uncore_snbep.c     | 57 ++++++++++++++++++------
 5 files changed, 93 insertions(+), 47 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 6e9469325ad0..4935c761956a 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -969,6 +969,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
 	if (type->cleanup_mapping)
 		type->cleanup_mapping(type);
 
+	if (type->cleanup_extra_boxes)
+		type->cleanup_extra_boxes(type);
+
 	if (pmu) {
 		for (i = 0; i < type->num_boxes; i++, pmu++) {
 			uncore_pmu_unregister(pmu);
@@ -1084,22 +1087,19 @@ static struct intel_uncore_pmu *
 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
 {
 	struct intel_uncore_type **types = uncore_pci_uncores;
+	struct intel_uncore_discovery_unit *unit;
 	struct intel_uncore_type *type;
-	u64 box_ctl;
-	int i, die;
+	struct rb_node *node;
 
 	for (; *types; types++) {
 		type = *types;
-		for (die = 0; die < __uncore_max_dies; die++) {
-			for (i = 0; i < type->num_boxes; i++) {
-				if (!type->box_ctls[die])
-					continue;
-				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
-				if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
-				    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
-				    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
-					return &type->pmus[i];
-			}
+
+		for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+			unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+			if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) &&
+			    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) &&
+			    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr))
+				return &type->pmus[unit->pmu_idx];
 		}
 	}
 
@@ -1375,28 +1375,25 @@ static struct notifier_block uncore_pci_notifier = {
 static void uncore_pci_pmus_register(void)
 {
 	struct intel_uncore_type **types = uncore_pci_uncores;
+	struct intel_uncore_discovery_unit *unit;
 	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
+	struct rb_node *node;
 	struct pci_dev *pdev;
-	u64 box_ctl;
-	int i, die;
 
 	for (; *types; types++) {
 		type = *types;
-		for (die = 0; die < __uncore_max_dies; die++) {
-			for (i = 0; i < type->num_boxes; i++) {
-				if (!type->box_ctls[die])
-					continue;
-				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
-				pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
-								   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
-								   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
-				if (!pdev)
-					continue;
-				pmu = &type->pmus[i];
 
-				uncore_pci_pmu_register(pdev, type, pmu, die);
-			}
+		for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+			unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+			pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr),
+							   UNCORE_DISCOVERY_PCI_BUS(unit->addr),
+							   UNCORE_DISCOVERY_PCI_DEVFN(unit->addr));
+
+			if (!pdev)
+				continue;
+			pmu = &type->pmus[unit->pmu_idx];
+			uncore_pci_pmu_register(pdev, type, pmu, unit->die);
 		}
 	}
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 0a49e304fe40..05c429c8cb93 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -99,6 +99,10 @@ struct intel_uncore_type {
 	int (*get_topology)(struct intel_uncore_type *type);
 	void (*set_mapping)(struct intel_uncore_type *type);
 	void (*cleanup_mapping)(struct intel_uncore_type *type);
+	/*
+	 * Optional callbacks for extra uncore units cleanup
+	 */
+	void (*cleanup_extra_boxes)(struct intel_uncore_type *type);
 };
 
 #define pmu_group attr_groups[0]
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 076ec1efe9cc..866493fda47c 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -215,8 +215,8 @@ uncore_find_unit(struct rb_root *root, unsigned int id)
 	return NULL;
 }
 
-static void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
-				 struct rb_root *root, u16 *num_units)
+void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
+			  struct rb_root *root, u16 *num_units)
 {
 	struct intel_uncore_discovery_unit *unit = uncore_find_unit(root, node->id);
 
@@ -560,7 +560,7 @@ bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
 	if (!box->pmu->type->boxes)
 		return false;
 
-	if (box->pci_dev || box->io_addr) {
+	if (box->io_addr) {
 		hwc->config_base = uncore_pci_event_ctl(box, hwc->idx);
 		hwc->event_base  = uncore_pci_perf_ctr(box, hwc->idx);
 		return true;
@@ -570,16 +570,28 @@ bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
 	if (!box_ctl)
 		return false;
 
+	if (box->pci_dev) {
+		box_ctl = UNCORE_DISCOVERY_PCI_BOX_CTRL(box_ctl);
+		hwc->config_base = box_ctl + uncore_pci_event_ctl(box, hwc->idx);
+		hwc->event_base  = box_ctl + uncore_pci_perf_ctr(box, hwc->idx);
+		return true;
+	}
+
 	hwc->config_base = box_ctl + box->pmu->type->event_ctl + hwc->idx;
 	hwc->event_base  = box_ctl + box->pmu->type->perf_ctr + hwc->idx;
 
 	return true;
 }
 
+static inline int intel_pci_uncore_box_ctl(struct intel_uncore_box *box)
+{
+	return UNCORE_DISCOVERY_PCI_BOX_CTRL(intel_generic_uncore_box_ctl(box));
+}
+
 void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
-	int box_ctl = uncore_pci_box_ctl(box);
+	int box_ctl = intel_pci_uncore_box_ctl(box);
 
 	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
 	pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
@@ -588,7 +600,7 @@ void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
 void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
-	int box_ctl = uncore_pci_box_ctl(box);
+	int box_ctl = intel_pci_uncore_box_ctl(box);
 
 	pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
 }
@@ -596,7 +608,7 @@ void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
 void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
-	int box_ctl = uncore_pci_box_ctl(box);
+	int box_ctl = intel_pci_uncore_box_ctl(box);
 
 	pci_write_config_dword(pdev, box_ctl, 0);
 }
@@ -748,6 +760,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
 		uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl);
 		uncore->box_ctls = type->box_ctrl_die;
 		uncore->pci_offsets = type->box_offset;
+		uncore->boxes = &type->units;
+		uncore->num_boxes = type->num_units;
 		break;
 	case UNCORE_ACCESS_MMIO:
 		uncore->ops = &generic_uncore_mmio_ops;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 4a7a7c819d6f..0acf9b681f3b 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -171,3 +171,5 @@ int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
 					unsigned int pmu_idx);
 bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
 					  struct intel_uncore_box *box);
+void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
+			  struct rb_root *root, u16 *num_units);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 7bd34548b8c1..880a77649d17 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6195,6 +6195,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
 	0, 0x8000, 0x10000, 0x18000
 };
 
+static void spr_extra_boxes_cleanup(struct intel_uncore_type *type)
+{
+	struct intel_uncore_discovery_unit *pos;
+	struct rb_node *node;
+
+	if (!type->boxes)
+		return;
+
+	while (!RB_EMPTY_ROOT(type->boxes)) {
+		node = rb_first(type->boxes);
+		pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
+		rb_erase(node, type->boxes);
+		kfree(pos);
+	}
+	kfree(type->boxes);
+	type->boxes = NULL;
+}
+
 static struct intel_uncore_type spr_uncore_upi = {
 	.event_mask		= SNBEP_PMON_RAW_EVENT_MASK,
 	.event_mask_ext		= SPR_RAW_EVENT_MASK_EXT,
@@ -6209,10 +6227,11 @@ static struct intel_uncore_type spr_uncore_upi = {
 	.num_counters		= 4,
 	.num_boxes		= SPR_UNCORE_UPI_NUM_BOXES,
 	.perf_ctr_bits		= 48,
-	.perf_ctr		= ICX_UPI_PCI_PMON_CTR0,
-	.event_ctl		= ICX_UPI_PCI_PMON_CTL0,
+	.perf_ctr		= ICX_UPI_PCI_PMON_CTR0 - ICX_UPI_PCI_PMON_BOX_CTL,
+	.event_ctl		= ICX_UPI_PCI_PMON_CTL0 - ICX_UPI_PCI_PMON_BOX_CTL,
 	.box_ctl		= ICX_UPI_PCI_PMON_BOX_CTL,
 	.pci_offsets		= spr_upi_pci_offsets,
+	.cleanup_extra_boxes	= spr_extra_boxes_cleanup,
 };
 
 static struct intel_uncore_type spr_uncore_m3upi = {
@@ -6222,11 +6241,12 @@ static struct intel_uncore_type spr_uncore_m3upi = {
 	.num_counters		= 4,
 	.num_boxes		= SPR_UNCORE_UPI_NUM_BOXES,
 	.perf_ctr_bits		= 48,
-	.perf_ctr		= ICX_M3UPI_PCI_PMON_CTR0,
-	.event_ctl		= ICX_M3UPI_PCI_PMON_CTL0,
+	.perf_ctr		= ICX_M3UPI_PCI_PMON_CTR0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
+	.event_ctl		= ICX_M3UPI_PCI_PMON_CTL0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
 	.box_ctl		= ICX_M3UPI_PCI_PMON_BOX_CTL,
 	.pci_offsets		= spr_upi_pci_offsets,
 	.constraints		= icx_uncore_m3upi_constraints,
+	.cleanup_extra_boxes	= spr_extra_boxes_cleanup,
 };
 
 enum perf_uncore_spr_iio_freerunning_type_id {
@@ -6513,10 +6533,11 @@ void spr_uncore_cpu_init(void)
 
 static void spr_update_device_location(int type_id)
 {
+	struct intel_uncore_discovery_unit *unit;
 	struct intel_uncore_type *type;
 	struct pci_dev *dev = NULL;
+	struct rb_root *root;
 	u32 device, devfn;
-	u64 *ctls;
 	int die;
 
 	if (type_id == UNCORE_SPR_UPI) {
@@ -6530,27 +6551,35 @@ static void spr_update_device_location(int type_id)
 	} else
 		return;
 
-	ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
-	if (!ctls) {
+	root = kzalloc(sizeof(struct rb_root), GFP_KERNEL);
+	if (!root) {
 		type->num_boxes = 0;
 		return;
 	}
+	*root = RB_ROOT;
 
 	while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
-		if (devfn != dev->devfn)
-			continue;
 
 		die = uncore_device_to_die(dev);
 		if (die < 0)
 			continue;
 
-		ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
-			    dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
-			    devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
-			    type->box_ctl;
+		unit = kzalloc(sizeof(*unit), GFP_KERNEL);
+		if (!unit)
+			continue;
+		unit->die = die;
+		unit->id = PCI_SLOT(dev->devfn) - PCI_SLOT(devfn);
+		unit->addr = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
+			     dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
+			     devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
+			     type->box_ctl;
+
+		unit->pmu_idx = unit->id;
+
+		uncore_find_add_unit(unit, root, NULL);
 	}
 
-	type->box_ctls = ctls;
+	type->boxes = root;
 }
 
 int spr_uncore_pci_init(void)

From e5608b2914b40c94b71788a8fc8155b8b2623108 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:30 -0700
Subject: [PATCH 07/39] perf/x86/uncore: Cleanup unused unit structure

Upstream commit: 15a4bd51853b9c67f49bb03c20b6b6cb60fd204f
Conflict: none

The unit control and ID information are retrieved from the unit control
RB tree. No one uses the old structure anymore. Remove them.

Intel-SIG: commit 15a4bd51853b ("perf/x86/uncore: Cleanup unused unit structure")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-8-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore.c           |   7 +-
 arch/x86/events/intel/uncore.h           |   2 -
 arch/x86/events/intel/uncore_discovery.c | 110 +++--------------------
 arch/x86/events/intel/uncore_discovery.h |   5 --
 4 files changed, 12 insertions(+), 112 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 4935c761956a..4d856b51307f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -868,7 +868,7 @@ static inline int uncore_get_box_id(struct intel_uncore_type *type,
 	if (type->boxes)
 		return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx);
 
-	return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
+	return pmu->pmu_idx;
 }
 
 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
@@ -980,10 +980,7 @@ static void uncore_type_exit(struct intel_uncore_type *type)
 		kfree(type->pmus);
 		type->pmus = NULL;
 	}
-	if (type->box_ids) {
-		kfree(type->box_ids);
-		type->box_ids = NULL;
-	}
+
 	kfree(type->events_group);
 	type->events_group = NULL;
 }
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 05c429c8cb93..027ef292c602 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -62,7 +62,6 @@ struct intel_uncore_type {
 	unsigned fixed_ctr;
 	unsigned fixed_ctl;
 	unsigned box_ctl;
-	u64 *box_ctls;	/* Unit ctrl addr of the first box of each die */
 	union {
 		unsigned msr_offset;
 		unsigned mmio_offset;
@@ -76,7 +75,6 @@ struct intel_uncore_type {
 		u64 *pci_offsets;
 		u64 *mmio_offsets;
 	};
-	unsigned *box_ids;
 	struct event_constraint unconstrainted;
 	struct event_constraint *constraints;
 	struct intel_uncore_pmu *pmus;
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 866493fda47c..571e44b49691 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -89,10 +89,6 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
 	if (!type)
 		return NULL;
 
-	type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
-	if (!type->box_ctrl_die)
-		goto free_type;
-
 	type->units = RB_ROOT;
 
 	type->access_type = unit->access_type;
@@ -102,12 +98,6 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
 	rb_add(&type->node, &discovery_tables, __type_less);
 
 	return type;
-
-free_type:
-	kfree(type);
-
-	return NULL;
-
 }
 
 static struct intel_uncore_discovery_type *
@@ -230,13 +220,10 @@ void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
 
 static void
 uncore_insert_box_info(struct uncore_unit_discovery *unit,
-		       int die, bool parsed)
+		       int die)
 {
 	struct intel_uncore_discovery_unit *node;
 	struct intel_uncore_discovery_type *type;
-	unsigned int *ids;
-	u64 *box_offset;
-	int i;
 
 	if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
 		pr_info("Invalid address is detected for uncore type %d box %d, "
@@ -253,79 +240,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
 	node->id = unit->box_id;
 	node->addr = unit->ctl;
 
-	if (parsed) {
-		type = search_uncore_discovery_type(unit->box_type);
-		if (!type) {
-			pr_info("A spurious uncore type %d is detected, "
-				"Disable the uncore type.\n",
-				unit->box_type);
-			kfree(node);
-			return;
-		}
-
-		uncore_find_add_unit(node, &type->units, &type->num_units);
-
-		/* Store the first box of each die */
-		if (!type->box_ctrl_die[die])
-			type->box_ctrl_die[die] = unit->ctl;
+	type = get_uncore_discovery_type(unit);
+	if (!type) {
+		kfree(node);
 		return;
 	}
 
-	type = get_uncore_discovery_type(unit);
-	if (!type)
-		goto free_node;
-
-	box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL);
-	if (!box_offset)
-		goto free_node;
-
-	ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL);
-	if (!ids)
-		goto free_box_offset;
-
 	uncore_find_add_unit(node, &type->units, &type->num_units);
 
 	/* Store generic information for the first box */
-	if (!type->num_boxes) {
-		type->box_ctrl = unit->ctl;
-		type->box_ctrl_die[die] = unit->ctl;
+	if (type->num_units == 1) {
 		type->num_counters = unit->num_regs;
 		type->counter_width = unit->bit_width;
 		type->ctl_offset = unit->ctl_offset;
 		type->ctr_offset = unit->ctr_offset;
-		*ids = unit->box_id;
-		goto end;
 	}
-
-	for (i = 0; i < type->num_boxes; i++) {
-		ids[i] = type->ids[i];
-		box_offset[i] = type->box_offset[i];
-
-		if (unit->box_id == ids[i]) {
-			pr_info("Duplicate uncore type %d box ID %d is detected, "
-				"Drop the duplicate uncore unit.\n",
-				unit->box_type, unit->box_id);
-			goto free_ids;
-		}
-	}
-	ids[i] = unit->box_id;
-	box_offset[i] = unit->ctl - type->box_ctrl;
-	kfree(type->ids);
-	kfree(type->box_offset);
-end:
-	type->ids = ids;
-	type->box_offset = box_offset;
-	type->num_boxes++;
-	return;
-
-free_ids:
-	kfree(ids);
-
-free_box_offset:
-	kfree(box_offset);
-
-free_node:
-	kfree(node);
 }
 
 static bool
@@ -404,7 +333,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
 		if (uncore_ignore_unit(&unit, ignore))
 			continue;
 
-		uncore_insert_box_info(&unit, die, *parsed);
+		uncore_insert_box_info(&unit, die);
 	}
 
 	*parsed = true;
@@ -474,7 +403,6 @@ void intel_uncore_clear_discovery_tables(void)
 			rb_erase(node, &type->units);
 			kfree(pos);
 		}
-		kfree(type->box_ctrl_die);
 		kfree(type);
 	}
 }
@@ -738,41 +666,23 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
 				      struct intel_uncore_discovery_type *type)
 {
 	uncore->type_id = type->type;
-	uncore->num_boxes = type->num_boxes;
 	uncore->num_counters = type->num_counters;
 	uncore->perf_ctr_bits = type->counter_width;
-	uncore->box_ids = type->ids;
+	uncore->perf_ctr = (unsigned int)type->ctr_offset;
+	uncore->event_ctl = (unsigned int)type->ctl_offset;
+	uncore->boxes = &type->units;
+	uncore->num_boxes = type->num_units;
 
 	switch (type_id) {
 	case UNCORE_ACCESS_MSR:
 		uncore->ops = &generic_uncore_msr_ops;
-		uncore->perf_ctr = (unsigned int)type->ctr_offset;
-		uncore->event_ctl = (unsigned int)type->ctl_offset;
-		uncore->box_ctl = (unsigned int)type->box_ctrl;
-		uncore->msr_offsets = type->box_offset;
-		uncore->boxes = &type->units;
-		uncore->num_boxes = type->num_units;
 		break;
 	case UNCORE_ACCESS_PCI:
 		uncore->ops = &generic_uncore_pci_ops;
-		uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset;
-		uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset;
-		uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl);
-		uncore->box_ctls = type->box_ctrl_die;
-		uncore->pci_offsets = type->box_offset;
-		uncore->boxes = &type->units;
-		uncore->num_boxes = type->num_units;
 		break;
 	case UNCORE_ACCESS_MMIO:
 		uncore->ops = &generic_uncore_mmio_ops;
-		uncore->perf_ctr = (unsigned int)type->ctr_offset;
-		uncore->event_ctl = (unsigned int)type->ctl_offset;
-		uncore->box_ctl = (unsigned int)type->box_ctrl;
-		uncore->box_ctls = type->box_ctrl_die;
-		uncore->mmio_offsets = type->box_offset;
 		uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE;
-		uncore->boxes = &type->units;
-		uncore->num_boxes = type->num_units;
 		break;
 	default:
 		return false;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 0acf9b681f3b..0e94aa7db8e7 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -124,18 +124,13 @@ struct intel_uncore_discovery_unit {
 struct intel_uncore_discovery_type {
 	struct rb_node	node;
 	enum uncore_access_type	access_type;
-	u64		box_ctrl;	/* Unit ctrl addr of the first box */
-	u64		*box_ctrl_die;	/* Unit ctrl addr of the first box of each die */
 	struct rb_root	units;		/* Unit ctrl addr for all units */
 	u16		type;		/* Type ID of the uncore block */
 	u8		num_counters;
 	u8		counter_width;
 	u8		ctl_offset;	/* Counter Control 0 offset */
 	u8		ctr_offset;	/* Counter 0 offset */
-	u16		num_boxes;	/* number of boxes for the uncore block */
 	u16		num_units;	/* number of units */
-	unsigned int	*ids;		/* Box IDs */
-	u64		*box_offset;	/* Box offset */
 };
 
 bool intel_uncore_has_discovery_tables(int *ignore);

From aa1f407f53b6da22526c2e96f98650cf007dd1c4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 14 Jun 2024 06:46:31 -0700
Subject: [PATCH 08/39] perf/x86/intel/uncore: Support HBM and CXL PMON
 counters

Upstream commit: f8a86a9bb5f7e65d8c4405052de062639a8783bb
Conflict: none

Unknown uncore PMON types can be found in both SPR and EMR with HBM or
CXL.

 $ls /sys/devices/ | grep type
 uncore_type_12_16
 uncore_type_12_18
 uncore_type_12_2
 uncore_type_12_4
 uncore_type_12_6
 uncore_type_12_8
 uncore_type_13_17
 uncore_type_13_19
 uncore_type_13_3
 uncore_type_13_5
 uncore_type_13_7
 uncore_type_13_9

The unknown PMON types are HBM and CXL PMON. Except for the name, the
other information regarding the HBM and CXL PMON counters can be
retrieved via the discovery table. Add them into the uncores tables for
SPR and EMR.

The event config registers for all CXL related units are 8-byte apart.
Add SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT to specially handle it.

Intel-SIG: commit f8a86a9bb5f7 ("perf/x86/intel/uncore: Support HBM and CXL PMON counters")
Backport SPR/EMR HBM and CXL PMON support to kernel v6.6

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yunying Sun <yunying.sun@intel.com>
Link: https://lore.kernel.org/r/20240614134631.1092359-9-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/uncore_snbep.c | 55 +++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 880a77649d17..8cf383fd4cfb 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6159,7 +6159,55 @@ static struct intel_uncore_type spr_uncore_mdf = {
 	.name			= "mdf",
 };
 
-#define UNCORE_SPR_NUM_UNCORE_TYPES		12
+static void spr_uncore_mmio_offs8_init_box(struct intel_uncore_box *box)
+{
+	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+	intel_generic_uncore_mmio_init_box(box);
+}
+
+static struct intel_uncore_ops spr_uncore_mmio_offs8_ops = {
+	.init_box		= spr_uncore_mmio_offs8_init_box,
+	.exit_box		= uncore_mmio_exit_box,
+	.disable_box		= intel_generic_uncore_mmio_disable_box,
+	.enable_box		= intel_generic_uncore_mmio_enable_box,
+	.disable_event		= intel_generic_uncore_mmio_disable_event,
+	.enable_event		= spr_uncore_mmio_enable_event,
+	.read_counter		= uncore_mmio_read_counter,
+};
+
+#define SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT()			\
+	SPR_UNCORE_COMMON_FORMAT(),				\
+	.ops			= &spr_uncore_mmio_offs8_ops
+
+static struct event_constraint spr_uncore_cxlcm_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x02, 0x0f),
+	UNCORE_EVENT_CONSTRAINT(0x05, 0x0f),
+	UNCORE_EVENT_CONSTRAINT(0x40, 0xf0),
+	UNCORE_EVENT_CONSTRAINT(0x41, 0xf0),
+	UNCORE_EVENT_CONSTRAINT(0x42, 0xf0),
+	UNCORE_EVENT_CONSTRAINT(0x43, 0xf0),
+	UNCORE_EVENT_CONSTRAINT(0x4b, 0xf0),
+	UNCORE_EVENT_CONSTRAINT(0x52, 0xf0),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type spr_uncore_cxlcm = {
+	SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
+	.name			= "cxlcm",
+	.constraints		= spr_uncore_cxlcm_constraints,
+};
+
+static struct intel_uncore_type spr_uncore_cxldp = {
+	SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
+	.name			= "cxldp",
+};
+
+static struct intel_uncore_type spr_uncore_hbm = {
+	SPR_UNCORE_COMMON_FORMAT(),
+	.name			= "hbm",
+};
+
+#define UNCORE_SPR_NUM_UNCORE_TYPES		15
 #define UNCORE_SPR_CHA				0
 #define UNCORE_SPR_IIO				1
 #define UNCORE_SPR_IMC				6
@@ -6183,6 +6231,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
 	NULL,
 	NULL,
 	&spr_uncore_mdf,
+	&spr_uncore_cxlcm,
+	&spr_uncore_cxldp,
+	&spr_uncore_hbm,
 };
 
 /*
@@ -6652,7 +6703,7 @@ static struct intel_uncore_type gnr_uncore_b2cmi = {
 };
 
 static struct intel_uncore_type gnr_uncore_b2cxl = {
-	SPR_UNCORE_MMIO_COMMON_FORMAT(),
+	SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
 	.name			= "b2cxl",
 };
 

From 4ee8e719d5d496f5c01d992024c854f608ceeef0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:01 -0700
Subject: [PATCH 09/39] perf/x86/intel: Use the common uarch name for the
 shared functions

Upstream commit: d4b5694c75d4eba8238d541a55da0c67e876213e
Conflict: none

From PMU's perspective, the SPR/GNR server has a similar uarch to the
ADL/MTL client p-core. Many functions are shared. However, the shared
function name uses the abbreviation of the server product code name,
rather than the common uarch code name.

Rename these internal shared functions by the common uarch name.

Intel-SIG: commit d4b5694c75d4 ("perf/x86/intel: Use the common uarch name for the shared functions")
Backport as a dependency needed by the GNR distinct pmu name fix

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-2-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 64 ++++++++++++++++++------------------
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h |  2 +-
 3 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8814a757e612..0c0dcf9254ce 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -299,7 +299,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
-static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
+static struct extra_reg intel_glc_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
@@ -309,7 +309,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_spr_event_constraints[] = {
+static struct event_constraint intel_glc_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -349,7 +349,7 @@ static struct event_constraint intel_spr_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
+static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
@@ -473,7 +473,7 @@ static u64 intel_pmu_event_map(int hw_event)
 	return intel_perfmon_event_map[hw_event];
 }
 
-static __initconst const u64 spr_hw_cache_event_ids
+static __initconst const u64 glc_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -552,7 +552,7 @@ static __initconst const u64 spr_hw_cache_event_ids
  },
 };
 
-static __initconst const u64 spr_hw_cache_extra_regs
+static __initconst const u64 glc_hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -4348,7 +4348,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 }
 
 static struct event_constraint *
-spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -4437,7 +4437,7 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
 	if (pmu->cpu_type == hybrid_big)
-		return spr_get_event_constraints(cpuc, idx, event);
+		return glc_get_event_constraints(cpuc, idx, event);
 	else if (pmu->cpu_type == hybrid_small)
 		return tnt_get_event_constraints(cpuc, idx, event);
 
@@ -4489,7 +4489,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct event_constraint *c;
 
-	c = spr_get_event_constraints(cpuc, idx, event);
+	c = glc_get_event_constraints(cpuc, idx, event);
 
 	/* The Retire Latency is not supported by the fixed counter 0. */
 	if (event->attr.precise_ip &&
@@ -4570,7 +4570,7 @@ static void nhm_limit_period(struct perf_event *event, s64 *left)
 	*left = max(*left, 32LL);
 }
 
-static void spr_limit_period(struct perf_event *event, s64 *left)
+static void glc_limit_period(struct perf_event *event, s64 *left)
 {
 	if (event->attr.precise_ip == 3)
 		*left = max(*left, 128LL);
@@ -5417,14 +5417,14 @@ static struct attribute *icl_tsx_events_attrs[] = {
 EVENT_ATTR_STR(mem-stores,	mem_st_spr,	"event=0xcd,umask=0x2");
 EVENT_ATTR_STR(mem-loads-aux,	mem_ld_aux,	"event=0x03,umask=0x82");
 
-static struct attribute *spr_events_attrs[] = {
+static struct attribute *glc_events_attrs[] = {
 	EVENT_PTR(mem_ld_hsw),
 	EVENT_PTR(mem_st_spr),
 	EVENT_PTR(mem_ld_aux),
 	NULL,
 };
 
-static struct attribute *spr_td_events_attrs[] = {
+static struct attribute *glc_td_events_attrs[] = {
 	EVENT_PTR(slots),
 	EVENT_PTR(td_retiring),
 	EVENT_PTR(td_bad_spec),
@@ -5437,7 +5437,7 @@ static struct attribute *spr_td_events_attrs[] = {
 	NULL,
 };
 
-static struct attribute *spr_tsx_events_attrs[] = {
+static struct attribute *glc_tsx_events_attrs[] = {
 	EVENT_PTR(tx_start),
 	EVENT_PTR(tx_abort),
 	EVENT_PTR(tx_commit),
@@ -6319,7 +6319,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_pebs_data_source_grt();
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
 		x86_pmu.get_event_constraints = tnt_get_event_constraints;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		td_attr = tnt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = nhm_format_attr;
@@ -6350,7 +6350,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_pebs_data_source_cmt();
 		x86_pmu.pebs_latency_data = mtl_latency_data_small;
 		x86_pmu.get_event_constraints = cmt_get_event_constraints;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		td_attr = cmt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = cmt_format_attr;
@@ -6667,20 +6667,20 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_SAPPHIRERAPIDS_X:
 	case INTEL_FAM6_EMERALDRAPIDS_X:
 		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
-		x86_pmu.extra_regs = intel_spr_extra_regs;
+		x86_pmu.extra_regs = intel_glc_extra_regs;
 		fallthrough;
 	case INTEL_FAM6_GRANITERAPIDS_X:
 	case INTEL_FAM6_GRANITERAPIDS_D:
 		pmem = true;
 		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
-		x86_pmu.event_constraints = intel_spr_event_constraints;
-		x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
+		x86_pmu.event_constraints = intel_glc_event_constraints;
+		x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints;
 		if (!x86_pmu.extra_regs)
-			x86_pmu.extra_regs = intel_gnr_extra_regs;
-		x86_pmu.limit_period = spr_limit_period;
+			x86_pmu.extra_regs = intel_rwc_extra_regs;
+		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.pebs_ept = 1;
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
@@ -6690,13 +6690,13 @@ __init int intel_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
 
 		x86_pmu.hw_config = hsw_hw_config;
-		x86_pmu.get_event_constraints = spr_get_event_constraints;
+		x86_pmu.get_event_constraints = glc_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
 		extra_skl_attr = skl_format_attr;
-		mem_attr = spr_events_attrs;
-		td_attr = spr_td_events_attrs;
-		tsx_attr = spr_tsx_events_attrs;
+		mem_attr = glc_events_attrs;
+		td_attr = glc_td_events_attrs;
+		tsx_attr = glc_tsx_events_attrs;
 		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
 		x86_pmu.lbr_pt_coexist = true;
 		intel_pmu_pebs_data_source_skl(pmem);
@@ -6746,7 +6746,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
 		/*
 		 * The rtm_abort_event is used to check whether to enable GPRs
@@ -6795,11 +6795,11 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 1;
 		pmu->intel_cap.pebs_output_pt_available = 0;
 
-		memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->event_constraints = intel_spr_event_constraints;
-		pmu->pebs_constraints = intel_spr_pebs_event_constraints;
-		pmu->extra_regs = intel_spr_extra_regs;
+		memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
+		memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
+		pmu->event_constraints = intel_glc_event_constraints;
+		pmu->pebs_constraints = intel_glc_pebs_event_constraints;
+		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
@@ -6823,7 +6823,7 @@ __init int intel_pmu_init(void)
 		pmu->pebs_constraints = intel_grt_pebs_event_constraints;
 		pmu->extra_regs = intel_grt_extra_regs;
 		if (is_mtl(boot_cpu_data.x86_model)) {
-			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
+			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
 			x86_pmu.pebs_latency_data = mtl_latency_data_small;
 			extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 				mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 58a4da322267..a568dd132ff2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_spr_pebs_event_constraints[] = {
+struct event_constraint intel_glc_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b8a2d3ba4ccd..38342d1614f5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1529,7 +1529,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 extern struct event_constraint intel_icl_pebs_event_constraints[];
 
-extern struct event_constraint intel_spr_pebs_event_constraints[];
+extern struct event_constraint intel_glc_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 

From 562370702cd385f80955f56a5bf9b0057db87313 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:02 -0700
Subject: [PATCH 10/39] perf/x86/intel: Factor out the initialization code for
 SPR

Upstream commit: 0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7
Conflict: none

The SPR and ADL p-core have a similar uarch. Most of the initialization
code can be shared.

Factor out intel_pmu_init_glc() for the common initialization code.
The common part of the ADL p-core will be replaced by the later patch.

Intel-SIG: commit 0ba0c03528e9 ("perf/x86/intel: Factor out the initialization code for SPR")
Backport as a dependency needed by the GNR distinct pmu name fix

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-3-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 49 +++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0c0dcf9254ce..303b9bf9547b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6020,6 +6020,30 @@ static __always_inline bool is_mtl(u8 x86_model)
 	       (x86_model == INTEL_FAM6_METEORLAKE_L);
 }
 
+static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
+{
+	x86_pmu.late_ack = true;
+	x86_pmu.limit_period = glc_limit_period;
+	x86_pmu.pebs_aliases = NULL;
+	x86_pmu.pebs_prec_dist = true;
+	x86_pmu.pebs_block = true;
+	x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+	x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+	x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+	x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
+	x86_pmu.lbr_pt_coexist = true;
+	x86_pmu.num_topdown_events = 8;
+	static_call_update(intel_pmu_update_topdown_event,
+			   &icl_update_topdown_event);
+	static_call_update(intel_pmu_set_topdown_event_period,
+			   &icl_set_topdown_event_period);
+
+	memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+	memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+	hybrid(pmu, event_constraints) = intel_glc_event_constraints;
+	hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr = &empty_attrs;
@@ -6671,24 +6695,10 @@ __init int intel_pmu_init(void)
 		fallthrough;
 	case INTEL_FAM6_GRANITERAPIDS_X:
 	case INTEL_FAM6_GRANITERAPIDS_D:
-		pmem = true;
-		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-		x86_pmu.event_constraints = intel_glc_event_constraints;
-		x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints;
+		intel_pmu_init_glc(NULL);
 		if (!x86_pmu.extra_regs)
 			x86_pmu.extra_regs = intel_rwc_extra_regs;
-		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.pebs_ept = 1;
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = glc_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
@@ -6697,14 +6707,7 @@ __init int intel_pmu_init(void)
 		mem_attr = glc_events_attrs;
 		td_attr = glc_td_events_attrs;
 		tsx_attr = glc_tsx_events_attrs;
-		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
-		x86_pmu.lbr_pt_coexist = true;
-		intel_pmu_pebs_data_source_skl(pmem);
-		x86_pmu.num_topdown_events = 8;
-		static_call_update(intel_pmu_update_topdown_event,
-				   &icl_update_topdown_event);
-		static_call_update(intel_pmu_set_topdown_event_period,
-				   &icl_set_topdown_event_period);
+		intel_pmu_pebs_data_source_skl(true);
 		pr_cont("Sapphire Rapids events, ");
 		name = "sapphire_rapids";
 		break;

From dc76fea243e1aee2cf68889f93c351df890d277b Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:03 -0700
Subject: [PATCH 11/39] perf/x86/intel: Factor out the initialization code for
 ADL e-core

Upstream commit: d87d221f854b62f5e8026505497d33404ef6050c
Conflict: none

From PMU's perspective, the ADL e-core and newer SRF/GRR have a similar
uarch. Most of the initialization code can be shared.

Factor out intel_pmu_init_grt() for the common initialization code.
The common part of the ADL e-core will be replaced by the later patch.

Intel-SIG: commit d87d221f854b ("perf/x86/intel: Factor out the initialization code for ADL e-core")
Backport as a dependency needed by the GNR distinct pmu name fix

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-4-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 58 +++++++++++++-----------------------
 1 file changed, 21 insertions(+), 37 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 303b9bf9547b..98b6fd726e67 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6044,6 +6044,25 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
 	hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
 }
 
+static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
+{
+	x86_pmu.mid_ack = true;
+	x86_pmu.limit_period = glc_limit_period;
+	x86_pmu.pebs_aliases = NULL;
+	x86_pmu.pebs_prec_dist = true;
+	x86_pmu.pebs_block = true;
+	x86_pmu.lbr_pt_coexist = true;
+	x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+	x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+	memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+	memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+	hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+	hybrid(pmu, event_constraints) = intel_slm_event_constraints;
+	hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints;
+	hybrid(pmu, extra_regs) = intel_grt_extra_regs;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr = &empty_attrs;
@@ -6322,28 +6341,10 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_ATOM_GRACEMONT:
-		x86_pmu.mid_ack = true;
-		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
-		       sizeof(hw_cache_extra_regs));
-		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-
-		x86_pmu.event_constraints = intel_slm_event_constraints;
-		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
-		x86_pmu.extra_regs = intel_grt_extra_regs;
-
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.lbr_pt_coexist = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
+		intel_pmu_init_grt(NULL);
 		intel_pmu_pebs_data_source_grt();
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
 		x86_pmu.get_event_constraints = tnt_get_event_constraints;
-		x86_pmu.limit_period = glc_limit_period;
 		td_attr = tnt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = nhm_format_attr;
@@ -6353,28 +6354,11 @@ __init int intel_pmu_init(void)
 
 	case INTEL_FAM6_ATOM_CRESTMONT:
 	case INTEL_FAM6_ATOM_CRESTMONT_X:
-		x86_pmu.mid_ack = true;
-		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
-		       sizeof(hw_cache_extra_regs));
-		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-
-		x86_pmu.event_constraints = intel_slm_event_constraints;
-		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+		intel_pmu_init_grt(NULL);
 		x86_pmu.extra_regs = intel_cmt_extra_regs;
-
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.lbr_pt_coexist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
 		intel_pmu_pebs_data_source_cmt();
 		x86_pmu.pebs_latency_data = mtl_latency_data_small;
 		x86_pmu.get_event_constraints = cmt_get_event_constraints;
-		x86_pmu.limit_period = glc_limit_period;
 		td_attr = cmt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = cmt_format_attr;

From 44aa5bd82d8eb1b02ea37b3dbc16dcb79fb337d8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:04 -0700
Subject: [PATCH 12/39] perf/x86/intel: Apply the common initialization code
 for ADL

Upstream commit: 299a5fc8e783eed705015e83e381912dbbf3eabc
Conflict: none

Use the intel_pmu_init_glc() and intel_pmu_init_grt() to replace the
duplicate code for ADL.

The current code already checks the PERF_X86_EVENT_TOPDOWN flag before
invoking the Topdown metrics functions. (The PERF_X86_EVENT_TOPDOWN flag
is to indicate the Topdown metric feature, which is only available for
the p-core.) Drop the unnecessary adl_set_topdown_event_period() and
adl_update_topdown_event().

Intel-SIG: commit 299a5fc8e783 ("perf/x86/intel: Apply the common initialization code for ADL")
Backport as a dependency needed by the GNR distinct pmu name fix

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-5-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 53 ++----------------------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 98b6fd726e67..754aec2513ff 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2561,16 +2561,6 @@ static int icl_set_topdown_event_period(struct perf_event *event)
 	return 0;
 }
 
-static int adl_set_topdown_event_period(struct perf_event *event)
-{
-	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
-
-	if (pmu->cpu_type != hybrid_big)
-		return 0;
-
-	return icl_set_topdown_event_period(event);
-}
-
 DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
 
 static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
@@ -2713,16 +2703,6 @@ static u64 icl_update_topdown_event(struct perf_event *event)
 						 x86_pmu.num_topdown_events - 1);
 }
 
-static u64 adl_update_topdown_event(struct perf_event *event)
-{
-	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
-
-	if (pmu->cpu_type != hybrid_big)
-		return 0;
-
-	return icl_update_topdown_event(event);
-}
-
 DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
 
 static void intel_pmu_read_topdown_event(struct perf_event *event)
@@ -6716,32 +6696,11 @@ __init int intel_pmu_init(void)
 		static_branch_enable(&perf_is_hybrid);
 		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
 
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-		x86_pmu.lbr_pt_coexist = true;
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
-		x86_pmu.num_topdown_events = 8;
-		static_call_update(intel_pmu_update_topdown_event,
-				   &adl_update_topdown_event);
-		static_call_update(intel_pmu_set_topdown_event_period,
-				   &adl_set_topdown_event_period);
-
 		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
-		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
-		/*
-		 * The rtm_abort_event is used to check whether to enable GPRs
-		 * for the RTM abort event. Atom doesn't have the RTM abort
-		 * event. There is no harmful to set it in the common
-		 * x86_pmu.rtm_abort_event.
-		 */
-		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
 
 		td_attr = adl_hybrid_events_attrs;
 		mem_attr = adl_hybrid_mem_attrs;
@@ -6753,6 +6712,7 @@ __init int intel_pmu_init(void)
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
 		pmu->cpu_type = hybrid_big;
+		intel_pmu_init_glc(&pmu->pmu);
 		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
 			pmu->num_counters = x86_pmu.num_counters + 2;
@@ -6782,16 +6742,13 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 1;
 		pmu->intel_cap.pebs_output_pt_available = 0;
 
-		memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->event_constraints = intel_glc_event_constraints;
-		pmu->pebs_constraints = intel_glc_pebs_event_constraints;
 		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		pmu->name = "cpu_atom";
 		pmu->cpu_type = hybrid_small;
+		intel_pmu_init_grt(&pmu->pmu);
 		pmu->mid_ack = true;
 		pmu->num_counters = x86_pmu.num_counters;
 		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
@@ -6803,12 +6760,6 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 0;
 		pmu->intel_cap.pebs_output_pt_available = 1;
 
-		memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-		pmu->event_constraints = intel_slm_event_constraints;
-		pmu->pebs_constraints = intel_grt_pebs_event_constraints;
-		pmu->extra_regs = intel_grt_extra_regs;
 		if (is_mtl(boot_cpu_data.x86_model)) {
 			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
 			x86_pmu.pebs_latency_data = mtl_latency_data_small;

From 94b736d9943c62e9dc7c7e152c0586cfa2ce234c Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:05 -0700
Subject: [PATCH 13/39] perf/x86/intel: Clean up the hybrid CPU type handling
 code

Upstream commit: b0560bfd4b70277a4936c82e50e940aa253c95bf
Conflict: none

There is a fairly long list of grievances about the current code. The
main beefs:

   1. hybrid_big_small assumes that the *HARDWARE* (CPUID) provided
      core types are a bitmap. They are not. If Intel happened to
      make a core type of 0xff, hilarity would ensue.
   2. adl_get_hybrid_cpu_type() utterly inscrutable.  There are
      precisely zero comments and zero changelog about what it is
      attempting to do.

According to Kan, the adl_get_hybrid_cpu_type() is there because some
Alder Lake (ADL) CPUs can do some silly things. Some ADL models are
*supposed* to be hybrid CPUs with big and little cores, but there are
some SKUs that only have big cores. CPUID(0x1a) on those CPUs does
not say that the CPUs are big cores. It apparently just returns 0x0.
It confuses perf because it expects to see either 0x40 (Core) or
0x20 (Atom).

The perf workaround for this is to watch for a CPU core saying it is
type 0x0. If that happens on an Alder Lake, it calls
x86_pmu.get_hybrid_cpu_type() and just assumes that the core is a
Core (0x40) CPU.

To fix up the mess, separate out the CPU types and the 'pmu' types.
This allows 'hybrid_pmu_type' bitmaps without worrying that some
future CPU type will set multiple bits.

Since the types are now separate, add a function to glue them back
together again. Actual comment on the situation in the glue
function (find_hybrid_pmu_for_cpu()).

Also, give ->get_hybrid_cpu_type() a real return type and make it
clear that it is overriding the *CPU* type, not the PMU type.

Rename cpu_type to pmu_type in the struct x86_hybrid_pmu to reflect the
change.

Intel-SIG: commit b0560bfd4b70 ("perf/x86/intel: Clean up the hybrid CPU type handling code")
Backport as a dependency needed by the GNR distinct pmu name fix

Originally-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-6-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/core.c       |  6 +--
 arch/x86/events/intel/core.c | 75 ++++++++++++++++++++++++------------
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h | 35 ++++++++++-------
 4 files changed, 75 insertions(+), 43 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ecdf6d759b82..5b0dd07b1ef1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1888,9 +1888,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev,
 
 	str = pmu_attr->event_str;
 	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type))
+		if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type))
 			continue;
-		if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) {
+		if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) {
 			next_str = strchr(str, ';');
 			if (next_str)
 				return snprintf(page, next_str - str + 1, "%s", str);
@@ -2170,7 +2170,7 @@ static int __init init_hw_perf_events(void)
 			hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
 
 			err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
-						(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
+						(hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
 			if (err)
 				break;
 		}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 754aec2513ff..4bc83a594915 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3864,7 +3864,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event)
 		return false;
 
 	if (is_hybrid())
-		return hybrid_pmu(event->pmu)->cpu_type == hybrid_big;
+		return hybrid_pmu(event->pmu)->pmu_type == hybrid_big;
 
 	return true;
 }
@@ -4416,9 +4416,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return glc_get_event_constraints(cpuc, idx, event);
-	else if (pmu->cpu_type == hybrid_small)
+	else if (pmu->pmu_type == hybrid_small)
 		return tnt_get_event_constraints(cpuc, idx, event);
 
 	WARN_ON(1);
@@ -4493,9 +4493,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return rwc_get_event_constraints(cpuc, idx, event);
-	if (pmu->cpu_type == hybrid_small)
+	if (pmu->pmu_type == hybrid_small)
 		return cmt_get_event_constraints(cpuc, idx, event);
 
 	WARN_ON(1);
@@ -4506,18 +4506,18 @@ static int adl_hw_config(struct perf_event *event)
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return hsw_hw_config(event);
-	else if (pmu->cpu_type == hybrid_small)
+	else if (pmu->pmu_type == hybrid_small)
 		return intel_pmu_hw_config(event);
 
 	WARN_ON(1);
 	return -EOPNOTSUPP;
 }
 
-static u8 adl_get_hybrid_cpu_type(void)
+static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
 {
-	return hybrid_big;
+	return HYBRID_INTEL_CORE;
 }
 
 /*
@@ -4693,22 +4693,47 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 	}
 }
 
+static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
+{
+	u8 cpu_type = get_this_hybrid_cpu_type();
+	int i;
+
+	/*
+	 * This is running on a CPU model that is known to have hybrid
+	 * configurations. But the CPU told us it is not hybrid, shame
+	 * on it. There should be a fixup function provided for these
+	 * troublesome CPUs (->get_hybrid_cpu_type).
+	 */
+	if (cpu_type == HYBRID_INTEL_NONE) {
+		if (x86_pmu.get_hybrid_cpu_type)
+			cpu_type = x86_pmu.get_hybrid_cpu_type();
+		else
+			return NULL;
+	}
+
+	/*
+	 * This essentially just maps between the 'hybrid_cpu_type'
+	 * and 'hybrid_pmu_type' enums:
+	 */
+	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
+		enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
+
+		if (cpu_type == HYBRID_INTEL_CORE &&
+		    pmu_type == hybrid_big)
+			return &x86_pmu.hybrid_pmu[i];
+		if (cpu_type == HYBRID_INTEL_ATOM &&
+		    pmu_type == hybrid_small)
+			return &x86_pmu.hybrid_pmu[i];
+	}
+
+	return NULL;
+}
+
 static bool init_hybrid_pmu(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-	u8 cpu_type = get_this_hybrid_cpu_type();
-	struct x86_hybrid_pmu *pmu = NULL;
-	int i;
+	struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu();
 
-	if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
-		cpu_type = x86_pmu.get_hybrid_cpu_type();
-
-	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) {
-			pmu = &x86_pmu.hybrid_pmu[i];
-			break;
-		}
-	}
 	if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) {
 		cpuc->pmu = NULL;
 		return false;
@@ -5783,7 +5808,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
 	struct perf_pmu_events_hybrid_attr *pmu_attr =
 		container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr);
 
-	return pmu->cpu_type & pmu_attr->pmu_type;
+	return pmu->pmu_type & pmu_attr->pmu_type;
 }
 
 static umode_t hybrid_events_is_visible(struct kobject *kobj,
@@ -5820,7 +5845,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
 		container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr);
 	int cpu = hybrid_find_supported_cpu(pmu);
 
-	return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0;
+	return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
 }
 
 static struct attribute_group hybrid_group_events_td  = {
@@ -6711,7 +6736,7 @@ __init int intel_pmu_init(void)
 		/* Initialize big core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
-		pmu->cpu_type = hybrid_big;
+		pmu->pmu_type = hybrid_big;
 		intel_pmu_init_glc(&pmu->pmu);
 		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
@@ -6747,7 +6772,7 @@ __init int intel_pmu_init(void)
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		pmu->name = "cpu_atom";
-		pmu->cpu_type = hybrid_small;
+		pmu->pmu_type = hybrid_small;
 		intel_pmu_init_grt(&pmu->pmu);
 		pmu->mid_ack = true;
 		pmu->num_counters = x86_pmu.num_counters;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a568dd132ff2..807de7b596ab 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
 {
 	u64 val;
 
-	WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+	WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
 
 	dse &= PERF_PEBS_DATA_SOURCE_MASK;
 	val = hybrid_var(event->pmu, pebs_data_source)[dse];
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 38342d1614f5..fb56518356ec 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -658,10 +658,29 @@ enum {
 #define PERF_PEBS_DATA_SOURCE_MAX	0x10
 #define PERF_PEBS_DATA_SOURCE_MASK	(PERF_PEBS_DATA_SOURCE_MAX - 1)
 
+enum hybrid_cpu_type {
+	HYBRID_INTEL_NONE,
+	HYBRID_INTEL_ATOM	= 0x20,
+	HYBRID_INTEL_CORE	= 0x40,
+};
+
+enum hybrid_pmu_type {
+	not_hybrid,
+	hybrid_small		= BIT(0),
+	hybrid_big		= BIT(1),
+
+	hybrid_big_small	= hybrid_big | hybrid_small, /* only used for matching */
+};
+
+#define X86_HYBRID_PMU_ATOM_IDX		0
+#define X86_HYBRID_PMU_CORE_IDX		1
+
+#define X86_HYBRID_NUM_PMUS		2
+
 struct x86_hybrid_pmu {
 	struct pmu			pmu;
 	const char			*name;
-	u8				cpu_type;
+	enum hybrid_pmu_type		pmu_type;
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
@@ -727,18 +746,6 @@ extern struct static_key_false perf_is_hybrid;
 	__Fp;						\
 })
 
-enum hybrid_pmu_type {
-	hybrid_big		= 0x40,
-	hybrid_small		= 0x20,
-
-	hybrid_big_small	= hybrid_big | hybrid_small,
-};
-
-#define X86_HYBRID_PMU_ATOM_IDX		0
-#define X86_HYBRID_PMU_CORE_IDX		1
-
-#define X86_HYBRID_NUM_PMUS		2
-
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -947,7 +954,7 @@ struct x86_pmu {
 	 */
 	int				num_hybrid_pmus;
 	struct x86_hybrid_pmu		*hybrid_pmu;
-	u8 (*get_hybrid_cpu_type)	(void);
+	enum hybrid_cpu_type (*get_hybrid_cpu_type)	(void);
 };
 
 struct x86_perf_task_context_opt {

From 86a90df768991df4a394f7de98fe42fcbeb33b09 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:06 -0700
Subject: [PATCH 14/39] perf/x86/intel: Add common intel_pmu_init_hybrid()

Upstream commit: 97588df87b56e27fd2b5d928d61c7a53e38afbb0
Conflict: none

The current hybrid initialization codes aren't well organized and are
hard to read.

Factor out intel_pmu_init_hybrid() to do a common setup for each
hybrid PMU. The PMU-specific capability will be updated later via either
hard code (ADL) or CPUID hybrid enumeration (MTL).

Splitting the ADL and MTL initialization codes, since they have
different uarches. The hard code PMU capabilities are not required for
MTL either. They can be enumerated by the new leaf 0x23 and
IA32_PERF_CAPABILITIES MSR.

The hybrid enumeration of the IA32_PERF_CAPABILITIES MSR is broken on
MTL. Using the default value.

Intel-SIG: commit 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
Backport as a dependency needed by the GNR distinct pmu name fix

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-7-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 160 ++++++++++++++++++++++++-----------
 1 file changed, 110 insertions(+), 50 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 4bc83a594915..dd56bbff47fb 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4678,6 +4678,16 @@ static void intel_pmu_check_num_counters(int *num_counters,
 					 int *num_counters_fixed,
 					 u64 *intel_ctrl, u64 fixed_mask);
 
+static inline bool intel_pmu_broken_perf_cap(void)
+{
+	/* The Perf Metric (Bit 15) is always cleared */
+	if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) ||
+	    (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L))
+		return true;
+
+	return false;
+}
+
 static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 {
 	unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
@@ -4690,7 +4700,27 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		pmu->num_counters_fixed = fls(ebx);
 		intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
 					     &pmu->intel_ctrl, ebx);
+		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->unconstrained = (struct event_constraint)
+				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+							0, pmu->num_counters, 0, 0);
 	}
+
+
+	if (!intel_pmu_broken_perf_cap()) {
+		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
+	}
+
+	if (pmu->intel_cap.perf_metrics)
+		pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
+	else
+		pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
+
+	if (pmu->intel_cap.pebs_output_pt_available)
+		pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+	else
+		pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
 }
 
 static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
@@ -6019,10 +6049,52 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
 	}
 }
 
-static __always_inline bool is_mtl(u8 x86_model)
+static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
+	{ hybrid_small, "cpu_atom" },
+	{ hybrid_big, "cpu_core" },
+};
+
+static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
 {
-	return (x86_model == INTEL_FAM6_METEORLAKE) ||
-	       (x86_model == INTEL_FAM6_METEORLAKE_L);
+	unsigned long pmus_mask = pmus;
+	struct x86_hybrid_pmu *pmu;
+	int idx = 0, bit;
+
+	x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask);
+	x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus,
+				     sizeof(struct x86_hybrid_pmu),
+				     GFP_KERNEL);
+	if (!x86_pmu.hybrid_pmu)
+		return -ENOMEM;
+
+	static_branch_enable(&perf_is_hybrid);
+	x86_pmu.filter = intel_pmu_filter;
+
+	for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) {
+		pmu = &x86_pmu.hybrid_pmu[idx++];
+		pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
+		pmu->name = intel_hybrid_pmu_type_map[bit].name;
+
+		pmu->num_counters = x86_pmu.num_counters;
+		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->unconstrained = (struct event_constraint)
+				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+							0, pmu->num_counters, 0, 0);
+
+		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
+		if (pmu->pmu_type & hybrid_small) {
+			pmu->intel_cap.perf_metrics = 0;
+			pmu->intel_cap.pebs_output_pt_available = 1;
+			pmu->mid_ack = true;
+		} else if (pmu->pmu_type & hybrid_big) {
+			pmu->intel_cap.perf_metrics = 1;
+			pmu->intel_cap.pebs_output_pt_available = 0;
+			pmu->late_ack = true;
+		}
+	}
+
+	return 0;
 }
 
 static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
@@ -6706,23 +6778,14 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
 	case INTEL_FAM6_RAPTORLAKE_S:
-	case INTEL_FAM6_METEORLAKE:
-	case INTEL_FAM6_METEORLAKE_L:
 		/*
 		 * Alder Lake has 2 types of CPU, core and atom.
 		 *
 		 * Initialize the common PerfMon capabilities here.
 		 */
-		x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS,
-					     sizeof(struct x86_hybrid_pmu),
-					     GFP_KERNEL);
-		if (!x86_pmu.hybrid_pmu)
-			return -ENOMEM;
-		static_branch_enable(&perf_is_hybrid);
-		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
+		intel_pmu_init_hybrid(hybrid_big_small);
 
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
-		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
@@ -6735,10 +6798,7 @@ __init int intel_pmu_init(void)
 
 		/* Initialize big core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
-		pmu->name = "cpu_core";
-		pmu->pmu_type = hybrid_big;
 		intel_pmu_init_glc(&pmu->pmu);
-		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
 			pmu->num_counters = x86_pmu.num_counters + 2;
 			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
@@ -6763,45 +6823,45 @@ __init int intel_pmu_init(void)
 		pmu->unconstrained = (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							   0, pmu->num_counters, 0, 0);
-		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
-		pmu->intel_cap.perf_metrics = 1;
-		pmu->intel_cap.pebs_output_pt_available = 0;
-
 		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
-		pmu->name = "cpu_atom";
-		pmu->pmu_type = hybrid_small;
 		intel_pmu_init_grt(&pmu->pmu);
-		pmu->mid_ack = true;
-		pmu->num_counters = x86_pmu.num_counters;
-		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
-		pmu->max_pebs_events = x86_pmu.max_pebs_events;
-		pmu->unconstrained = (struct event_constraint)
-					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							   0, pmu->num_counters, 0, 0);
-		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
-		pmu->intel_cap.perf_metrics = 0;
-		pmu->intel_cap.pebs_output_pt_available = 1;
 
-		if (is_mtl(boot_cpu_data.x86_model)) {
-			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
-			x86_pmu.pebs_latency_data = mtl_latency_data_small;
-			extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
-				mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
-			mem_attr = mtl_hybrid_mem_attrs;
-			intel_pmu_pebs_data_source_mtl();
-			x86_pmu.get_event_constraints = mtl_get_event_constraints;
-			pmu->extra_regs = intel_cmt_extra_regs;
-			pr_cont("Meteorlake Hybrid events, ");
-			name = "meteorlake_hybrid";
-		} else {
-			x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
-			intel_pmu_pebs_data_source_adl();
-			pr_cont("Alderlake Hybrid events, ");
-			name = "alderlake_hybrid";
-		}
+		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+		intel_pmu_pebs_data_source_adl();
+		pr_cont("Alderlake Hybrid events, ");
+		name = "alderlake_hybrid";
+		break;
+
+	case INTEL_FAM6_METEORLAKE:
+	case INTEL_FAM6_METEORLAKE_L:
+		intel_pmu_init_hybrid(hybrid_big_small);
+
+		x86_pmu.pebs_latency_data = mtl_latency_data_small;
+		x86_pmu.get_event_constraints = mtl_get_event_constraints;
+		x86_pmu.hw_config = adl_hw_config;
+
+		td_attr = adl_hybrid_events_attrs;
+		mem_attr = mtl_hybrid_mem_attrs;
+		tsx_attr = adl_hybrid_tsx_attrs;
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+		/* Initialize big core specific PerfMon capabilities.*/
+		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+		intel_pmu_init_glc(&pmu->pmu);
+		pmu->extra_regs = intel_rwc_extra_regs;
+
+		/* Initialize Atom core specific PerfMon capabilities.*/
+		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+		intel_pmu_init_grt(&pmu->pmu);
+		pmu->extra_regs = intel_cmt_extra_regs;
+
+		intel_pmu_pebs_data_source_mtl();
+		pr_cont("Meteorlake Hybrid events, ");
+		name = "meteorlake_hybrid";
 		break;
 
 	default:
@@ -6913,7 +6973,7 @@ __init int intel_pmu_init(void)
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
 
-	if (is_hybrid())
+	if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
 		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
 
 	if (x86_pmu.intel_cap.pebs_timing_info)

From 682db94239f49da66e5d931c17518c1f292480b1 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 8 Jul 2024 12:33:34 -0700
Subject: [PATCH 15/39] perf/x86/intel: Hide Topdown metrics events if the
 feature is not enumerated

Upstream commit: 556a7c039a52c21da33eaae9269984a1ef59189b
Conflict: none

The below error is observed on Ice Lake VM.

$ perf stat
Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument)
for event (slots).
/bin/dmesg | grep -i perf may provide additional information.

In a virtualization env, the Topdown metrics and the slots event haven't
been supported yet. The guest CPUID doesn't enumerate them. However, the
current kernel unconditionally exposes the slots event and the Topdown
metrics events to sysfs, which misleads the perf tool and triggers the
error.

Hide the perf-metrics topdown events and the slots event if the
perf-metrics feature is not enumerated.

The big core of a hybrid platform can also supports the perf-metrics
feature. Fix the hybrid platform as well.

Intel-SIG: commit 556a7c039a52 ("perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated")
Backport 3 core pmu bugfixes to kernel v6.6

Closes: https://lore.kernel.org/lkml/CAM9d7cj8z+ryyzUHR+P1Dcpot2jjW+Qcc4CPQpfafTXN=LEU0Q@mail.gmail.com/
Reported-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Dongli Zhang <dongli.zhang@oracle.com>
Link: https://lkml.kernel.org/r/20240708193336.1192217-2-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dd56bbff47fb..6aac4b80be1f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5675,8 +5675,22 @@ exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
 	return x86_pmu.version >= 2 ? attr->mode : 0;
 }
 
+static umode_t
+td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	/*
+	 * Hide the perf metrics topdown events
+	 * if the feature is not enumerated.
+	 */
+	if (x86_pmu.num_topdown_events)
+		return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0;
+
+	return attr->mode;
+}
+
 static struct attribute_group group_events_td  = {
 	.name = "events",
+	.is_visible = td_is_visible,
 };
 
 static struct attribute_group group_events_mem = {
@@ -5878,9 +5892,27 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
 	return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
 }
 
+static umode_t hybrid_td_is_visible(struct kobject *kobj,
+				    struct attribute *attr, int i)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct x86_hybrid_pmu *pmu =
+		 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+
+	if (!is_attr_for_this_pmu(kobj, attr))
+		return 0;
+
+
+	/* Only the big core supports perf metrics */
+	if (pmu->pmu_type == hybrid_big)
+		return pmu->intel_cap.perf_metrics ? attr->mode : 0;
+
+	return attr->mode;
+}
+
 static struct attribute_group hybrid_group_events_td  = {
 	.name		= "events",
-	.is_visible	= hybrid_events_is_visible,
+	.is_visible	= hybrid_td_is_visible,
 };
 
 static struct attribute_group hybrid_group_events_mem = {

From 329015159479744ef2f03455fcab9b000e57cf8c Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 8 Jul 2024 12:33:36 -0700
Subject: [PATCH 16/39] perf/x86/intel/ds: Fix non 0 retire latency on
 Raptorlake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Upstream commit: e5f32ad56b22ebe384a6e7ddad6e9520c5495563
Conflict: none

A non-0 retire latency can be observed on a Raptorlake which doesn't
support the retire latency feature.
By design, the retire latency shares the PERF_SAMPLE_WEIGHT_STRUCT
sample type with other types of latency. That could avoid adding too
many different sample types to support all kinds of latency. For the
machine which doesn't support some kind of latency, 0 should be
returned.

Perf doesn’t clear/init all the fields of a sample data for the sake
of performance. It expects the later perf_{prepare,output}_sample() to
update the uninitialized field. However, the current implementation
doesn't touch the field of the retire latency if the feature is not
supported. The memory garbage is dumped into the perf data.

Clear the retire latency if the feature is not supported.

Intel-SIG: commit e5f32ad56b22 ("perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake")
Backport 3 core pmu bugfixes to kernel v6.6

Fixes: c87a31093c70 ("perf/x86: Support Retire Latency")
Reported-by: "Bayduraev, Alexey V" <alexey.v.bayduraev@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: "Bayduraev, Alexey V" <alexey.v.bayduraev@intel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20240708193336.1192217-4-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/ds.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 807de7b596ab..14b11d607f47 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1830,8 +1830,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 	set_linear_ip(regs, basic->ip);
 	regs->flags = PERF_EFLAGS_EXACT;
 
-	if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
-		data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+		if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
+			data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+		else
+			data->weight.var3_w = 0;
+	}
 
 	/*
 	 * The record for MEMINFO is in front of GP

From dec082bc57c39b02511fa7762ddab8ff5d46b9c3 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 8 Jul 2024 12:33:35 -0700
Subject: [PATCH 17/39] perf/x86/intel: Add a distinct name for Granite Rapids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Upstream commit: fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4
Conflict: none

Currently, the Sapphire Rapids and Granite Rapids share the same PMU
name, sapphire_rapids. Because from the kernel’s perspective, GNR is
similar to SPR. The only key difference is that they support different
extra MSRs. The code path and the PMU name are shared.

However, from end users' perspective, they are quite different. Besides
the extra MSRs, GNR has a newer PEBS format, supports Retire Latency,
supports new CPUID enumeration architecture, doesn't required the
load-latency AUX event, has additional TMA Level 1 Architectural Events,
etc. The differences can be enumerated by CPUID or the PERF_CAPABILITIES
MSR. They weren't reflected in the model-specific kernel setup.
But it is worth to have a distinct PMU name for GNR.

Intel-SIG: commit fa0c1c9d283b ("perf/x86/intel: Add a distinct name for Granite Rapids")
Backport 3 core pmu bugfixes to kernel v6.6

Fixes: a6742cb90b56 ("perf/x86/intel: Fix the FRONTEND encoding on GNR and MTL")
Suggested-by: Ahmad Yasin <ahmad.yasin@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20240708193336.1192217-3-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6aac4b80be1f..230b45c77afe 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6785,12 +6785,18 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_EMERALDRAPIDS_X:
 		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
 		x86_pmu.extra_regs = intel_glc_extra_regs;
-		fallthrough;
+		pr_cont("Sapphire Rapids events, ");
+		name = "sapphire_rapids";
+		goto glc_common;
+
 	case INTEL_FAM6_GRANITERAPIDS_X:
 	case INTEL_FAM6_GRANITERAPIDS_D:
+		x86_pmu.extra_regs = intel_rwc_extra_regs;
+		pr_cont("Granite Rapids events, ");
+		name = "granite_rapids";
+
+	glc_common:
 		intel_pmu_init_glc(NULL);
-		if (!x86_pmu.extra_regs)
-			x86_pmu.extra_regs = intel_rwc_extra_regs;
 		x86_pmu.pebs_ept = 1;
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = glc_get_event_constraints;
@@ -6801,8 +6807,6 @@ __init int intel_pmu_init(void)
 		td_attr = glc_td_events_attrs;
 		tsx_attr = glc_tsx_events_attrs;
 		intel_pmu_pebs_data_source_skl(true);
-		pr_cont("Sapphire Rapids events, ");
-		name = "sapphire_rapids";
 		break;
 
 	case INTEL_FAM6_ALDERLAKE:

From 37f622d20ea0d8d6609e989ec89d69aaf61d2f31 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 11 Sep 2023 06:51:28 -0700
Subject: [PATCH 18/39] perf/x86/intel: Fix broken fixed event constraints
 extension

Upstream commit: 950ecdc672aec9cd29036b2e2535b07c103af494
Conflict: none

Unnecessary multiplexing is triggered when running an "instructions"
event on an MTL.

perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1

 Performance counter stats for 'system wide':

       115,489,000      cpu_core/instructions/                (50.02%)
       127,433,777      cpu_core/instructions/                (49.98%)

       1.002294504 seconds time elapsed

Linux architectural perf events, e.g., cycles and instructions, usually
have dedicated fixed counters. These events also have equivalent events
which can be used in the general-purpose counters. The counters are
precious. In the intel_pmu_check_event_constraints(), perf check/extend
the event constraints of these events. So these events can utilize both
fixed counters and general-purpose counters.

The following cleanup commit:

  97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")

forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap().
The architectural perf events cannot utilize the general-purpose counters.

The code to check and update the counters, event constraints and
extra_regs is the same among hybrid systems. Move
intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and
emove the duplicate check in update_pmu_cap().

Intel-SIG: commit 950ecdc672ae ("perf/x86/intel: Fix broken fixed event constraints extension")
Backport following hybrid pmu bugfixes for commit 97588df87b56

Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 65 +++++++++++++++---------------------
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 230b45c77afe..1e2fde76780c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4678,6 +4678,13 @@ static void intel_pmu_check_num_counters(int *num_counters,
 					 int *num_counters_fixed,
 					 u64 *intel_ctrl, u64 fixed_mask);
 
+static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
+					      int num_counters,
+					      int num_counters_fixed,
+					      u64 intel_ctrl);
+
+static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
+
 static inline bool intel_pmu_broken_perf_cap(void)
 {
 	/* The Perf Metric (Bit 15) is always cleared */
@@ -4698,12 +4705,6 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 			    &eax, &ebx, &ecx, &edx);
 		pmu->num_counters = fls(eax);
 		pmu->num_counters_fixed = fls(ebx);
-		intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
-					     &pmu->intel_ctrl, ebx);
-		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
-		pmu->unconstrained = (struct event_constraint)
-				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							0, pmu->num_counters, 0, 0);
 	}
 
 
@@ -4711,6 +4712,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
 	}
+}
+
+static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
+{
+	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
+				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
+	pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+	pmu->unconstrained = (struct event_constraint)
+			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+						0, pmu->num_counters, 0, 0);
 
 	if (pmu->intel_cap.perf_metrics)
 		pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
@@ -4721,6 +4732,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
 	else
 		pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+
+	intel_pmu_check_event_constraints(pmu->event_constraints,
+					  pmu->num_counters,
+					  pmu->num_counters_fixed,
+					  pmu->intel_ctrl);
+
+	intel_pmu_check_extra_regs(pmu->extra_regs);
 }
 
 static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
@@ -4776,6 +4794,8 @@ static bool init_hybrid_pmu(int cpu)
 	if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
 		update_pmu_cap(pmu);
 
+	intel_pmu_check_hybrid_pmus(pmu);
+
 	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
 		return false;
 
@@ -6051,36 +6071,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
 	}
 }
 
-static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
-{
-	struct x86_hybrid_pmu *pmu;
-	int i;
-
-	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		pmu = &x86_pmu.hybrid_pmu[i];
-
-		intel_pmu_check_num_counters(&pmu->num_counters,
-					     &pmu->num_counters_fixed,
-					     &pmu->intel_ctrl,
-					     fixed_mask);
-
-		if (pmu->intel_cap.perf_metrics) {
-			pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
-			pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
-		}
-
-		if (pmu->intel_cap.pebs_output_pt_available)
-			pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
-
-		intel_pmu_check_event_constraints(pmu->event_constraints,
-						  pmu->num_counters,
-						  pmu->num_counters_fixed,
-						  pmu->intel_ctrl);
-
-		intel_pmu_check_extra_regs(pmu->extra_regs);
-	}
-}
-
 static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
 	{ hybrid_small, "cpu_atom" },
 	{ hybrid_big, "cpu_core" },
@@ -7009,9 +6999,6 @@ __init int intel_pmu_init(void)
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
 
-	if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
-		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
-
 	if (x86_pmu.intel_cap.pebs_timing_info)
 		x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
 

From a40502e4552eb6895509d8d898073b9e62f8c8c5 Mon Sep 17 00:00:00 2001
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
Date: Tue, 21 Nov 2023 09:46:28 +0800
Subject: [PATCH 19/39] perf/x86/intel: Correct incorrect 'or' operation for
 PMU capabilities

Upstream commit: e8df9d9f4209c04161321d8c12640ae560f65939
Conflict: none

When running perf-stat command on Intel hybrid platform, perf-stat
reports the following errors:

  sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1

  Opening: cpu/cycles/:HG
  ------------------------------------------------------------
  perf_event_attr:
    type                             0 (PERF_TYPE_HARDWARE)
    config                           0xa00000000
    disabled                         1
  ------------------------------------------------------------
  sys_perf_event_open: pid 0  cpu -1  group_fd -1  flags 0x8
  sys_perf_event_open failed, error -16

   Performance counter stats for 'sleep 1':

       <not counted>      cpu_atom/instructions/

It looks the cpu_atom/instructions/ event can't be enabled on atom PMU
even when the process is pinned on atom core. Investigation shows that
exclusive_event_init() helper always returns -EBUSY error in the perf
event creation. That's strange since the atom PMU should not be an
exclusive PMU.

Further investigation shows the issue was introduced by commit:

  97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")

The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT
from PMU capabilities if intel_cap.pebs_output_pt_available is not set,
but it incorrectly uses 'or' operation and leads to all PMU capabilities
bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT.

Testing this fix on Intel hybrid platforms, the observed issues
disappear.

Intel-SIG: commit e8df9d9f4209 ("perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities")
Backport following hybrid pmu bugfixes for commit 97588df87b56

Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com
[ Yunying Sun: amend commit log ]
Signed-off-by: Yunying Sun <yunying.sun@intel.com>
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 1e2fde76780c..8b8e9189fd41 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4731,7 +4731,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
 	if (pmu->intel_cap.pebs_output_pt_available)
 		pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
 	else
-		pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+		pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
 
 	intel_pmu_check_event_constraints(pmu->event_constraints,
 					  pmu->num_counters,

From 4f9b9837d687b631fd28d7d73cbf8cbf84b034dc Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 8 Apr 2024 14:23:31 +0800
Subject: [PATCH 20/39] Add early quirk to identify kh-40000

Identify kh-40000 platforms by specific PCI device's version number.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/early-quirks.c | 19 +++++++++++++++++++
 include/linux/dma-map-ops.h    | 10 ++++++++++
 2 files changed, 29 insertions(+)

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a6c1867fc7aa..ddb857d94ed9 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
 #include <linux/bcma/bcma.h>
 #include <linux/bcma/bcma_regs.h>
 #include <linux/platform_data/x86/apple.h>
+#include <linux/dma-map-ops.h>
 #include <drm/i915_drm.h>
 #include <drm/i915_pciids.h>
 #include <asm/pci-direct.h>
@@ -28,6 +29,7 @@
 #include <asm/gart.h>
 #include <asm/irq_remapping.h>
 #include <asm/early_ioremap.h>
+#include <asm/dma-mapping.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -685,6 +687,19 @@ static void __init apple_airport_reset(int bus, int slot, int func)
 	early_iounmap(mmio, BCM4331_MMIO_SIZE);
 }
 
+bool is_zhaoxin_kh40000;
+
+static void quirk_zhaoxin_dma_patch(int num, int slot, int func)
+{
+	u8 revision;
+
+	revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
+	if (revision == 0x10) {
+		is_zhaoxin_kh40000 = true;
+		pr_info("zhaoxin direct dma patch enabled\n");
+	}
+}
+
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -728,6 +743,10 @@ static struct chipset early_qrk[] __initdata = {
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
 	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
+	{ PCI_VENDOR_ID_ZHAOXIN, 0x1001, PCI_CLASS_BRIDGE_HOST,
+	  PCI_BASE_CLASS_BRIDGE, QFLAG_APPLY_ONCE, quirk_zhaoxin_dma_patch },
+	{ PCI_VENDOR_ID_ZHAOXIN, 0x345B, PCI_CLASS_BRIDGE_HOST,
+	  PCI_BASE_CLASS_BRIDGE, QFLAG_APPLY_ONCE, quirk_zhaoxin_dma_patch },
 	{}
 };
 
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index ef0296925336..fdd20fda2b9f 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -516,4 +516,14 @@ pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
 }
 #endif /* CONFIG_PCI_P2PDMA */
 
+#if defined CONFIG_PCI && defined CONFIG_X86
+
+extern bool is_zhaoxin_kh40000;
+
+#else
+
+bool __weak is_zhaoxin_kh40000;
+
+#endif
+
 #endif /* _LINUX_DMA_MAP_OPS_H */

From ab7f82d72290046790f10ac6f2bd8b6579899498 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 8 Apr 2024 20:35:27 +0800
Subject: [PATCH 21/39] Add kh40000_direct_dma_ops for KH-40000 platform

Add 'kh40000_direct_dma_ops' to replace 'direct_dma_ops' for KH-40000
platform.
For coherent DMA access, memory can be allocated only from the memory node
of the node where the device resides.
For streaming DMA access, add a PCI read operation at the end of DMA
access.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 .../admin-guide/kernel-parameters.txt         |   5 +
 arch/x86/kernel/Makefile                      |   1 +
 arch/x86/kernel/early-quirks.c                |   1 +
 arch/x86/kernel/zhaoxin_kh40000.c             | 176 ++++++++++++++++++
 include/linux/dma-map-ops.h                   |   1 +
 kernel/dma/contiguous.c                       |   3 +
 6 files changed, 187 insertions(+)
 create mode 100644 arch/x86/kernel/zhaoxin_kh40000.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 50fa1b60f517..18f325ab879b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2325,6 +2325,11 @@
 	isapnp=		[ISAPNP]
 			Format: <RDP>,<reset>,<pci_scan>,<verbosity>
 
+	zhaoxin_patch_bitmask=
+			[X86] Bitmask for Zhaoxin Platform's patch.
+			bit 0: enable KH-40000 dma patch's node check function
+
+
 	isolcpus=	[KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
 			[Deprecated - use cpusets instead]
 			Format: [flag-list,]<cpu-list>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c25d40cbbdbe..2b433325ca8f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -159,6 +159,7 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_MMCONF_FAM10H)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
+	obj-$(CONFIG_PCI)		+= zhaoxin_kh40000.o
 endif
 
 obj-$(CONFIG_HYGON_CSV)			+= csv.o
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ddb857d94ed9..b5f5e0916894 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -696,6 +696,7 @@ static void quirk_zhaoxin_dma_patch(int num, int slot, int func)
 	revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
 	if (revision == 0x10) {
 		is_zhaoxin_kh40000 = true;
+		dma_ops = &kh40000_dma_direct_ops;
 		pr_info("zhaoxin direct dma patch enabled\n");
 	}
 }
diff --git a/arch/x86/kernel/zhaoxin_kh40000.c b/arch/x86/kernel/zhaoxin_kh40000.c
new file mode 100644
index 000000000000..c477b18892fa
--- /dev/null
+++ b/arch/x86/kernel/zhaoxin_kh40000.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/kstrtox.h>
+#include <linux/pci.h>
+#include <linux/pfn.h>
+#include <linux/printk.h>
+#include <linux/scatterlist.h>
+#include <linux/types.h>
+
+#include <asm/dma-mapping.h>
+
+#include "../../../kernel/dma/direct.h"
+
+/***
+ * usage:
+ *  set "zhaoxin_patch_bitmask=<value>" in cmdline
+ * value description:
+ *  bit 0: enable(1) node check or not(0). default 1
+ */
+enum {
+	ZHAOXIN_P2CW_NODE_CHECK = BIT(0),
+	ZHAOXIN_PATCH_CODE_MAX  = ZHAOXIN_P2CW_NODE_CHECK,
+};
+
+#define ZHAOXIN_PATCH_CODE_DEFAULT	ZHAOXIN_P2CW_NODE_CHECK
+
+unsigned long zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT;
+
+static int __init zhaoxin_patch_code_setup(char *str)
+{
+	int err = kstrtoul(str, 0, &zhaoxin_patch_code);
+
+	if (err || (zhaoxin_patch_code > ZHAOXIN_PATCH_CODE_MAX)) {
+		pr_err("cmdline 'zhaoxin_patch_bitmask=%s' inappropriate\n",
+				str);
+		return err;
+	}
+
+	if (ZHAOXIN_P2CW_NODE_CHECK | zhaoxin_patch_code)
+		pr_info("zhaoxin dma patch node check is enabled\n");
+
+	return 0;
+}
+__setup("zhaoxin_patch_bitmask=", zhaoxin_patch_code_setup);
+
+static struct pci_dev *kh40000_get_pci_dev(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return to_pci_dev(dev);
+
+	if (dev->parent)
+		return kh40000_get_pci_dev(dev->parent);
+
+	return NULL;
+}
+
+static void kh40000_sync_single_dma_for_cpu(struct device *dev, dma_addr_t paddr,
+		enum dma_data_direction dir, bool is_iommu)
+{
+	u8 vid;
+	struct pci_dev *pci;
+	u64 dma_mask = *dev->dma_mask;
+
+	/* check direction */
+	if ((dir != DMA_FROM_DEVICE) && (dir != DMA_BIDIRECTIONAL))
+		return;
+
+	/* check dma capability */
+	if (dma_mask <= DMA_BIT_MASK(32))
+		return;
+
+	/* check device type */
+	pci = kh40000_get_pci_dev(dev);
+	if (pci == NULL)
+		return;
+
+	/* get real physical address */
+	if (is_iommu) {
+		struct iommu_domain *domain = iommu_get_dma_domain(dev);
+
+		paddr = iommu_iova_to_phys(domain, paddr);
+		if (!paddr)
+			return;
+	}
+
+	/* check node or not */
+	if ((zhaoxin_patch_code & ZHAOXIN_P2CW_NODE_CHECK)
+	   && pfn_to_nid(PFN_DOWN(paddr)) == dev_to_node(dev))
+		return;
+
+	/* flush data by one pci read cycle */
+	pci_read_config_byte(pci, PCI_VENDOR_ID, &vid);
+}
+
+/* zhaoxin kh-40000 direct dma ops */
+static void *kh40000_dma_direct_alloc(struct device *dev, size_t size,
+		dma_addr_t *addr, gfp_t gfp, unsigned long attrs)
+{
+	if (dev->coherent_dma_mask > DMA_BIT_MASK(32))
+		gfp |= __GFP_THISNODE;
+
+	return dma_direct_alloc(dev, size, addr, gfp, attrs);
+}
+
+static void kh40000_dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0);
+	dma_direct_unmap_page(dev, addr, size, dir, attrs);
+}
+
+static void kh40000_dma_direct_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl,  sg, nents, i)
+		kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0);
+
+	dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+
+static void kh40000_dma_direct_sync_single_for_cpu(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0);
+	dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void kh40000_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl,  sg, nents, i)
+		kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0);
+
+	dma_direct_unmap_sg(dev, sgl, nents, dir, attrs);
+}
+
+static void kh40000_dma_direct_unmap_resource(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0);
+}
+
+const struct dma_map_ops kh40000_dma_direct_ops = {
+	.flags			= DMA_F_PCI_P2PDMA_SUPPORTED,
+	.alloc			= kh40000_dma_direct_alloc,
+	.sync_sg_for_cpu	= kh40000_dma_direct_sync_sg_for_cpu,
+	.unmap_page		= kh40000_dma_direct_unmap_page,
+	.sync_single_for_cpu	= kh40000_dma_direct_sync_single_for_cpu,
+	.unmap_sg		= kh40000_dma_direct_unmap_sg,
+	.unmap_resource		= kh40000_dma_direct_unmap_resource,
+	.dma_supported		= dma_direct_supported,
+	.free			= dma_direct_free,
+	.alloc_pages		= dma_direct_alloc_pages,
+	.free_pages		= dma_direct_free_pages,
+	.sync_single_for_device	= dma_direct_sync_single_for_device,
+	.sync_sg_for_device	= dma_direct_sync_sg_for_device,
+	.get_required_mask	= dma_direct_get_required_mask,
+	.max_mapping_size	= dma_direct_max_mapping_size,
+	.mmap			= dma_direct_mmap,
+	.get_sgtable		= dma_direct_get_sgtable,
+	.map_page		= dma_direct_map_page,
+	.map_sg			= dma_direct_map_sg,
+	.map_resource		= dma_direct_map_resource,
+};
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index fdd20fda2b9f..46f7635153b1 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -519,6 +519,7 @@ pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
 #if defined CONFIG_PCI && defined CONFIG_X86
 
 extern bool is_zhaoxin_kh40000;
+extern const struct dma_map_ops kh40000_dma_direct_ops;
 
 #else
 
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index f005c66f378c..8b860c7ecabc 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -224,6 +224,9 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 
 	dma_numa_cma_reserve();
 
+	if (is_zhaoxin_kh40000)
+		return;
+
 	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
 
 	if (size_cmdline != -1) {

From 4660502422ff4f7e39f046cf452584165531c5ae Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 8 Apr 2024 14:25:45 +0800
Subject: [PATCH 22/39] Add kh40000_iommu_dma_ops for KH-40000 platform

Add 'kh40000_iommu_dma_ops' to replace 'intel_dma_ops' for KH-40000
platform.

For coherent DMA access, memory can be allocated only from the memory node
of the node where the device resides.

For streaming DMA access, add a PCI read operation at the end of DMA
access.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/zhaoxin_kh40000.c | 175 ++++++++++++++++++++++++++++++
 drivers/iommu/intel/iommu.c       |   3 +
 include/linux/dma-map-ops.h       |   6 +
 3 files changed, 184 insertions(+)

diff --git a/arch/x86/kernel/zhaoxin_kh40000.c b/arch/x86/kernel/zhaoxin_kh40000.c
index c477b18892fa..e8dd3bd43e72 100644
--- a/arch/x86/kernel/zhaoxin_kh40000.c
+++ b/arch/x86/kernel/zhaoxin_kh40000.c
@@ -174,3 +174,178 @@ const struct dma_map_ops kh40000_dma_direct_ops = {
 	.map_sg			= dma_direct_map_sg,
 	.map_resource		= dma_direct_map_resource,
 };
+
+/* zhaoxin kh-40000 iommu dma ops */
+static const struct dma_map_ops *iommu_dma_ops;
+
+static void *kh40000_iommu_dma_alloc(struct device *dev, size_t size,
+		dma_addr_t *addr, gfp_t gfp, unsigned long attrs)
+{
+	gfp |= __GFP_THISNODE;
+
+	return iommu_dma_ops->alloc(dev, size, addr, gfp, attrs);
+}
+
+static void kh40000_iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t handle, unsigned long attrs)
+{
+	iommu_dma_ops->free(dev, size, cpu_addr, handle, attrs);
+}
+
+static struct page *kh40000_dma_common_alloc_pages(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+	return iommu_dma_ops->alloc_pages(dev, size, dma_handle, dir, gfp);
+}
+
+static void kh40000_dma_common_free_pages(struct device *dev, size_t size, struct page *page,
+		dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+	iommu_dma_ops->free_pages(dev, size, page, dma_handle, dir);
+}
+
+static struct sg_table *kh40000_iommu_dma_alloc_noncontiguous(struct device *dev,
+		size_t size, enum dma_data_direction dir, gfp_t gfp,
+		unsigned long attrs)
+{
+	return iommu_dma_ops->alloc_noncontiguous(dev, size, dir, gfp, attrs);
+}
+
+static void kh40000_iommu_dma_free_noncontiguous(struct device *dev, size_t size,
+		struct sg_table *sgt, enum dma_data_direction dir)
+{
+	return iommu_dma_ops->free_noncontiguous(dev, size, sgt, dir);
+}
+
+static int kh40000_iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
+{
+	return iommu_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+static void kh40000_iommu_dma_unmap_page(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1);
+	iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs);
+}
+
+static int kh40000_iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
+{
+	return iommu_dma_ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
+}
+
+static dma_addr_t kh40000_iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs);
+}
+
+static int kh40000_iommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs);
+}
+
+static void kh40000_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+		int nelems, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1);
+	iommu_dma_ops->unmap_sg(dev, sgl, nelems, dir, attrs);
+}
+
+static void kh40000_iommu_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1);
+	iommu_dma_ops->sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void kh40000_iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	iommu_dma_ops->sync_single_for_device(dev, addr, size, dir);
+}
+
+static void kh40000_iommu_dma_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nelems,
+		enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1);
+	iommu_dma_ops->sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void kh40000_iommu_dma_sync_sg_for_device(struct device *dev,
+		struct scatterlist *sgl, int nelems,
+		enum dma_data_direction dir)
+{
+	iommu_dma_ops->sync_sg_for_device(dev, sgl, nelems, dir);
+}
+
+static dma_addr_t kh40000_iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	return iommu_dma_ops->map_resource(dev, phys, size, dir, attrs);
+}
+
+static void kh40000_iommu_dma_unmap_resource(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1);
+	iommu_dma_ops->unmap_resource(dev, addr, size, dir, attrs);
+}
+
+static unsigned long kh40000_iommu_dma_get_merge_boundary(struct device *dev)
+{
+	return iommu_dma_ops->get_merge_boundary(dev);
+}
+
+static size_t kh40000_iommu_dma_opt_mapping_size(void)
+{
+	return iommu_dma_ops->opt_mapping_size();
+}
+
+const struct dma_map_ops kh40000_dma_iommu_ops = {
+	.flags			= DMA_F_PCI_P2PDMA_SUPPORTED,
+	.alloc			= kh40000_iommu_dma_alloc,
+	.free			= kh40000_iommu_dma_free,
+	.unmap_page		= kh40000_iommu_dma_unmap_page,
+	.alloc_pages		= kh40000_dma_common_alloc_pages,
+	.free_pages		= kh40000_dma_common_free_pages,
+	.alloc_noncontiguous	= kh40000_iommu_dma_alloc_noncontiguous,
+	.free_noncontiguous	= kh40000_iommu_dma_free_noncontiguous,
+	.mmap			= kh40000_iommu_dma_mmap,
+	.get_sgtable		= kh40000_iommu_dma_get_sgtable,
+	.map_page		= kh40000_iommu_dma_map_page,
+	.map_sg			= kh40000_iommu_dma_map_sg,
+	.unmap_sg		= kh40000_iommu_dma_unmap_sg,
+	.sync_single_for_cpu	= kh40000_iommu_dma_sync_single_for_cpu,
+	.sync_single_for_device	= kh40000_iommu_dma_sync_single_for_device,
+	.sync_sg_for_cpu	= kh40000_iommu_dma_sync_sg_for_cpu,
+	.sync_sg_for_device	= kh40000_iommu_dma_sync_sg_for_device,
+	.map_resource		= kh40000_iommu_dma_map_resource,
+	.unmap_resource		= kh40000_iommu_dma_unmap_resource,
+	.get_merge_boundary	= kh40000_iommu_dma_get_merge_boundary,
+	.opt_mapping_size	= kh40000_iommu_dma_opt_mapping_size,
+};
+
+void kh40000_set_iommu_dma_ops(struct device *dev)
+{
+	if (dev->dma_ops) {
+		iommu_dma_ops = dev->dma_ops;
+		set_dma_ops(dev, &kh40000_dma_iommu_ops);
+		pr_info_once("zhaoxin iommu dma patch enabled\n");
+	}
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4f296b8a022a..3d1d6ba9676f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4479,6 +4479,9 @@ static void intel_iommu_probe_finalize(struct device *dev)
 {
 	set_dma_ops(dev, NULL);
 	iommu_setup_dma_ops(dev, 0, U64_MAX);
+
+	if (is_zhaoxin_kh40000)
+		kh40000_set_iommu_dma_ops(dev);
 }
 
 static void intel_iommu_get_resv_regions(struct device *device,
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 46f7635153b1..f6d31b80d382 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -520,10 +520,16 @@ pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
 
 extern bool is_zhaoxin_kh40000;
 extern const struct dma_map_ops kh40000_dma_direct_ops;
+void kh40000_set_iommu_dma_ops(struct device *dev);
 
 #else
 
 bool __weak is_zhaoxin_kh40000;
+static inline void kh40000_set_iommu_dma_ops(struct device *dev)
+{
+
+}
+
 
 #endif
 

From e1577467ecbc0e59577e1c31d15492c0401f8302 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Fri, 28 Jun 2024 10:36:46 +0800
Subject: [PATCH 23/39] iommu/vt-d: Add support for detecting ACPI namespace
 device in RMRR

As below, ZX-200 xHCI mcu is a RMRR ANDD device in some case.

[060h 0096   2]                Subtable Type : 0001 [Reserved Memory Region
[062h 0098   2]                       Length : 0020

[064h 0100   2]                     Reserved : 0000
[066h 0102   2]           PCI Segment Number : 0000
[068h 0104   8]                 Base Address : 00000000B5DA5000
[070h 0112   8]          End Address (limit) : 00000000B5DDDFFF

[078h 0120   1]            Device Scope Type : 05 [Namespace Device]
[079h 0121   1]                 Entry Length : 08
[07Ah 0122   2]                     Reserved : 0000
[07Ch 0124   1]               Enumeration ID : 02
[07Dh 0125   1]               PCI Bus Number : 09

[07Eh 0126   2]                     PCI Path : 12,00

iommu driver cannot find this device and build identity map for the RMRR
region, DMAR faults would occur for xHCI controller.

Add func dmar_acpi_bus_add_dev to find the RMRR ANDD device.

Add func acpi_rmrr_andd_probe to build identity map for the RMRR region
into the domain of the correspanding xHCI controller.

Add func iova_reserve_domain_addr to keep away from RMRR region when using
dma iova.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/iommu/dma-iommu.c   | 19 ++++++++++++
 drivers/iommu/intel/dmar.c  | 59 ++++++++++++++++++++++++++++++++++++-
 drivers/iommu/intel/iommu.c | 59 +++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c       | 13 +++++++-
 include/linux/dmar.h        |  9 ++++++
 include/linux/iommu.h       | 15 ++++++++++
 6 files changed, 172 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1afb510ca4b2..4a91275bc221 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -562,6 +562,25 @@ resv_iova:
 	return 0;
 }
 
+int iova_reserve_domain_addr(struct iommu_domain *domain, dma_addr_t start, dma_addr_t end)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+
+	unsigned long lo, hi;
+
+	lo = iova_pfn(iovad, start);
+	hi = iova_pfn(iovad, end);
+
+	if (!cookie)
+		return -EINVAL;
+
+	reserve_iova(iovad, lo, hi);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iova_reserve_domain_addr);
+
 static int iova_reserve_iommu_regions(struct device *dev,
 		struct iommu_domain *domain)
 {
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23cb80d62a9a..c8f7a98da804 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -767,6 +767,59 @@ static void __init dmar_acpi_insert_dev_scope(u8 device_number,
 		device_number, dev_name(&adev->dev));
 }
 
+/* Return: > 0 if match found, 0 if no match found */
+bool dmar_rmrr_acpi_insert_dev_scope(u8 device_number,
+				struct acpi_device *adev,
+				void *start, void *end,
+				struct dmar_dev_scope *devices,
+				int devices_cnt)
+{
+	struct acpi_dmar_device_scope *scope;
+	struct device *tmp;
+	int i;
+	struct acpi_dmar_pci_path *path;
+
+	for (; start < end; start += scope->length) {
+		scope = start;
+		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
+			continue;
+		if (scope->enumeration_id != device_number)
+			continue;
+		path = (void *)(scope + 1);
+		pr_info("ACPI device \"%s\" under DMAR as %02x:%02x.%d\n", dev_name(&adev->dev),
+				scope->bus, path->device, path->function);
+		for_each_dev_scope(devices, devices_cnt, i, tmp)
+			if (tmp == NULL) {
+				devices[i].bus = scope->bus;
+				devices[i].devfn = PCI_DEVFN(path->device, path->function);
+				rcu_assign_pointer(devices[i].dev, get_device(&adev->dev));
+				return true;
+			}
+		WARN_ON(i >= devices_cnt);
+	}
+	return false;
+}
+
+static int dmar_acpi_bus_add_dev(u8 device_number, struct acpi_device *adev)
+{
+	struct dmar_drhd_unit *dmaru;
+	struct acpi_dmar_hardware_unit *drhd;
+	int ret;
+
+	for_each_drhd_unit(dmaru) {
+		drhd = container_of(dmaru->hdr, struct acpi_dmar_hardware_unit, header);
+		ret = dmar_rmrr_acpi_insert_dev_scope(device_number, adev, (void *)(drhd+1),
+						((void *)drhd)+drhd->header.length,
+						dmaru->devices, dmaru->devices_cnt);
+		if (ret)
+			break;
+	}
+	if (ret > 0)
+		ret = dmar_rmrr_add_acpi_dev(device_number, adev);
+
+	return ret;
+}
+
 static int __init dmar_acpi_dev_scope_init(void)
 {
 	struct acpi_dmar_andd *andd;
@@ -794,7 +847,11 @@ static int __init dmar_acpi_dev_scope_init(void)
 				       andd->device_name);
 				continue;
 			}
-			dmar_acpi_insert_dev_scope(andd->device_number, adev);
+
+			if (apply_zhaoxin_dmar_acpi_a_behavior())
+				dmar_acpi_bus_add_dev(andd->device_number, adev);
+			else
+				dmar_acpi_insert_dev_scope(andd->device_number, adev);
 		}
 	}
 	return 0;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4f296b8a022a..4b24283a858a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3468,6 +3468,24 @@ out:
 	return ret;
 }
 
+int dmar_rmrr_add_acpi_dev(u8 device_number, struct acpi_device *adev)
+{
+	int ret;
+	struct dmar_rmrr_unit *rmrru;
+	struct acpi_dmar_reserved_memory *rmrr;
+
+	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
+		rmrr = container_of(rmrru->hdr, struct acpi_dmar_reserved_memory, header);
+		ret = dmar_rmrr_acpi_insert_dev_scope(device_number, adev, (void *)(rmrr + 1),
+					((void *)rmrr) + rmrr->header.length,
+					rmrru->devices, rmrru->devices_cnt);
+		if (ret)
+			break;
+	}
+
+	return 0;
+}
+
 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
 	int ret;
@@ -3726,6 +3744,43 @@ static int __init platform_optin_force_iommu(void)
 	return 1;
 }
 
+static inline int acpi_rmrr_device_create_direct_mappings(struct iommu_domain *domain,
+				struct device *dev)
+{
+	int ret;
+
+	pr_info("rmrr andd dev:%s enter to %s\n", dev_name(dev), __func__);
+	ret = __acpi_rmrr_device_create_direct_mappings(domain, dev);
+
+	return ret;
+}
+
+static inline int acpi_rmrr_andd_probe(struct device *dev)
+{
+	struct intel_iommu *iommu = NULL;
+	struct pci_dev *pci_device = NULL;
+	u8 bus, devfn;
+	int ret = 0;
+
+	ret = iommu_probe_device(dev);
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu) {
+		pr_info("dpoint-- cannot get acpi device corresponding iommu\n");
+		return -EINVAL;
+	}
+
+	pci_device = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
+	if (!pci_device) {
+		pr_info("dpoint-- cannot get acpi devie corresponding pci_device\n");
+		return -EINVAL;
+	}
+	ret = acpi_rmrr_device_create_direct_mappings(iommu_get_domain_for_dev(&pci_device->dev),
+			dev);
+
+	return ret;
+}
+
 static int __init probe_acpi_namespace_devices(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -3748,6 +3803,10 @@ static int __init probe_acpi_namespace_devices(void)
 			list_for_each_entry(pn,
 					    &adev->physical_node_list, node) {
 				ret = iommu_probe_device(pn->dev);
+
+				if (apply_zhaoxin_dmar_acpi_a_behavior())
+					ret = acpi_rmrr_andd_probe(dev);
+
 				if (ret)
 					break;
 			}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3f1029c0825e..1bb7a4a39d28 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1103,7 +1103,8 @@ map_end:
 				map_size = 0;
 			}
 		}
-
+		if (apply_zhaoxin_dmar_acpi_a_behavior())
+			iova_reserve_domain_addr(domain, start, end);
 	}
 
 	if (!list_empty(&mappings) && iommu_is_dma_domain(domain))
@@ -1171,6 +1172,16 @@ err_free_device:
 	return ERR_PTR(ret);
 }
 
+int __acpi_rmrr_device_create_direct_mappings(struct iommu_domain *domain, struct device *dev)
+{
+	int ret;
+
+	ret = iommu_create_device_direct_mappings(domain, dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__acpi_rmrr_device_create_direct_mappings);
+
 /**
  * iommu_group_add_device - add a device to an iommu group
  * @group: the group into which to add the device (reference should be held)
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e34b601b71fd..543c53e84a70 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -112,6 +112,9 @@ extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
 				 void *start, void*end, u16 segment,
 				 struct dmar_dev_scope *devices,
 				 int devices_cnt);
+extern bool dmar_rmrr_acpi_insert_dev_scope(u8 device_number,
+				struct acpi_device *adev, void *start, void *end,
+				struct dmar_dev_scope *devices, int devices_cnt);
 extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info,
 				 u16 segment, struct dmar_dev_scope *devices,
 				 int count);
@@ -144,6 +147,7 @@ extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg);
 extern int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg);
 extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg);
 extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert);
+extern int dmar_rmrr_add_acpi_dev(u8 device_number, struct acpi_device *adev);
 extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
 #else /* !CONFIG_INTEL_IOMMU: */
 static inline int intel_iommu_init(void) { return -ENODEV; }
@@ -155,6 +159,11 @@ static inline void intel_iommu_shutdown(void) { }
 #define	dmar_release_one_atsr		dmar_res_noop
 #define	dmar_parse_one_satc		dmar_res_noop
 
+static inline int dmar_rmrr_add_acpi_dev(u8 device_number, struct acpi_device *adev)
+{
+	return 0;
+}
+
 static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
 	return 0;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c31186a76f29..70170a9a0a92 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -586,6 +586,21 @@ void iommu_set_dma_strict(void);
 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
 			      unsigned long iova, int flags);
 
+static inline bool apply_zhaoxin_dmar_acpi_a_behavior(void)
+{
+#if defined(CONFIG_CPU_SUP_ZHAOXIN) || defined(CONFIG_CPU_SUP_CENTAUR)
+	if (((boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) ||
+		(boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)) &&
+		((boot_cpu_data.x86 == 7) && (boot_cpu_data.x86_model == 0x3b)))
+		return true;
+#endif
+	return false;
+}
+
+extern int iova_reserve_domain_addr(struct iommu_domain *domain, dma_addr_t start, dma_addr_t end);
+
+int __acpi_rmrr_device_create_direct_mappings(struct iommu_domain *domain, struct device *dev);
+
 static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
 	if (domain->ops->flush_iotlb_all)

From 53812a7c8af83a6c1a6382cff456da433cc80b36 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Tue, 2 Jan 2024 19:24:03 +0800
Subject: [PATCH 24/39] cpufreq: ACPI: add ITMT support when CPPC enabled

The _CPC method can get per-core highest frequency.
The highest frequency may varies between cores which mean cores can
running at different max frequency, so can use it as a core priority
and give a hint to scheduler in order to put critical task to the
higher priority core.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/itmt.c         |  2 +
 drivers/cpufreq/acpi-cpufreq.c | 87 +++++++++++++++++++++++++++-------
 2 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index ee4fe8cdb857..b49ac8ecbbd6 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sched_set_itmt_support);
 
 /**
  * sched_clear_itmt_support() - Revoke platform's support of ITMT
@@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
 {
 	per_cpu(sched_core_priority, cpu) = prio;
 }
+EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 4ac3a35dcd98..d34a8ca6187d 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
 #endif
 
 #ifdef CONFIG_ACPI_CPPC_LIB
-static u64 get_max_boost_ratio(unsigned int cpu)
+static bool cppc_highest_perf_diff;
+static struct cpumask core_prior_mask;
+
+static void cppc_get_highest_nominal_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
 {
 	struct cppc_perf_caps perf_caps;
-	u64 highest_perf, nominal_perf;
 	int ret;
 
+	ret = cppc_get_perf_caps(cpu, &perf_caps);
+	if (ret) {
+		pr_debug("CPU%d: Unable to get performance capabilities (%d)\n", cpu, ret);
+		return;
+	}
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		*highest_perf = amd_get_highest_perf();
+	else
+		*highest_perf = perf_caps.highest_perf;
+
+	*nominal_perf = perf_caps.nominal_perf;
+}
+
+static u64 get_max_boost_ratio(unsigned int cpu)
+{
+	u64 highest_perf, nominal_perf;
+
 	if (acpi_pstate_strict)
 		return 0;
 
-	ret = cppc_get_perf_caps(cpu, &perf_caps);
-	if (ret) {
-		pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
-			 cpu, ret);
-		return 0;
-	}
-
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		highest_perf = amd_get_highest_perf();
-	else
-		highest_perf = perf_caps.highest_perf;
-
-	nominal_perf = perf_caps.nominal_perf;
+	cppc_get_highest_nominal_perf(cpu, &highest_perf, &nominal_perf);
 
 	if (!highest_perf || !nominal_perf) {
 		pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
@@ -663,8 +670,51 @@ static u64 get_max_boost_ratio(unsigned int cpu)
 
 	return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
 }
+
+/* The work item is needed to avoid CPU hotplug locking issues */
+static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
+{
+	sched_set_itmt_support();
+}
+
+static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
+
+static void cpufreq_set_itmt_prio(int cpu)
+{
+	u64 highest_perf, nominal_perf;
+	static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
+
+	cppc_get_highest_nominal_perf(cpu, &highest_perf, &nominal_perf);
+
+	sched_set_itmt_core_prio(highest_perf, cpu);
+	cpumask_set_cpu(cpu, &core_prior_mask);
+
+	if (max_highest_perf <= min_highest_perf) {
+		if (highest_perf > max_highest_perf)
+			max_highest_perf = highest_perf;
+
+		if (highest_perf < min_highest_perf)
+			min_highest_perf = highest_perf;
+
+		if (max_highest_perf > min_highest_perf) {
+			/*
+			 * This code can be run during CPU online under the
+			 * CPU hotplug locks, so sched_set_itmt_support()
+			 * cannot be called from here.  Queue up a work item
+			 * to invoke it.
+			 */
+			cppc_highest_perf_diff = true;
+		}
+	}
+
+	if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
+		pr_debug("queue a work to set itmt enabled\n");
+		schedule_work(&sched_itmt_work);
+	}
+}
 #else
 static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
+static inline void cpufreq_set_itmt_prio(int cpu) { }
 #endif
 
 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -677,7 +727,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	unsigned int valid_states = 0;
 	unsigned int result = 0;
 	u64 max_boost_ratio;
-	unsigned int i;
+	unsigned int i, j;
 #ifdef CONFIG_SMP
 	static int blacklisted;
 #endif
@@ -741,6 +791,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		pr_info_once("overriding BIOS provided _PSD data\n");
 	}
 #endif
+	if (c->x86_vendor == X86_VENDOR_CENTAUR || c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+		for_each_cpu(j, policy->cpus) {
+			cpufreq_set_itmt_prio(j);
+		}
+	}
 
 	/* capability check */
 	if (perf->state_count <= 1) {

From c090c94dbbf46c23f6c65dc55f75013c561ba426 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Wed, 3 Jan 2024 10:38:35 +0800
Subject: [PATCH 25/39] Set ASYM_PACKING Flag on Zhaoxin KH-40000 platform

Set ASYM_PACKING Flag on Zhaoxin KH-40000 platform

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 kernel/sched/topology.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 1d5a86a1d779..4781021c3714 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2485,6 +2485,17 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 		}
 	}
 
+#if IS_ENABLED(CONFIG_X86)
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ||
+	     boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) &&
+	    (boot_cpu_data.x86 == 7 && boot_cpu_data.x86_model == 0x5b)) {
+		for_each_cpu(i, cpu_map) {
+			for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent)
+				sd->flags |= SD_ASYM_PACKING;
+		}
+	}
+#endif
+
 	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
 		if (!cpumask_test_cpu(i, cpu_map))

From 7163102e0aaa3f66d264a6076436c717a13762a1 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Tue, 18 Jun 2024 18:53:26 +0800
Subject: [PATCH 26/39] ata: ahci: Add support for AHCI SGPIO Enclosure
 Management

To monitor and control auxiliary service in a drive enclosure, Zhaoxin
AHCI controller adds enclosure management support in SGPIO protocols with
two messages types:
LED message type and SGPIO register interface message type.

The LED message type uses a genernal ahci specific interface which has
already been supported by default ahci driver, the SGPIO register
interface message type based on SFF-8485 which defined by vendor specific,
this patch adds support for it.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/ata/Kconfig              |  10 +
 drivers/ata/Makefile             |   1 +
 drivers/ata/ahci_zhaoxin_sgpio.c | 706 +++++++++++++++++++++++++++++++
 drivers/ata/ahci_zhaoxin_sgpio.h | 221 ++++++++++
 drivers/ata/libahci.c            |   6 +
 5 files changed, 944 insertions(+)
 create mode 100644 drivers/ata/ahci_zhaoxin_sgpio.c
 create mode 100644 drivers/ata/ahci_zhaoxin_sgpio.h

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 42b51c9812a0..fe2d3e9ab256 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -115,6 +115,16 @@ config SATA_AHCI
 
 	  If unsure, say N.
 
+config AHCI_ZHAOXIN_SGPIO
+	tristate "zhaoxin AHCI SGPIO support"
+	depends on SATA_AHCI
+	default y
+	help
+	  This option enables support for Zhaoxin AHCI SGPIO.
+	  Add support SGPIO mode and SGPIO GP mode.
+
+	  If unsure, say N.
+
 config SATA_MOBILE_LPM_POLICY
 	int "Default SATA Link Power Management policy for low power chipsets"
 	range 0 4
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 20e6645ab737..7ab98075e887 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_AHCI_ST)		+= ahci_st.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_TEGRA)	+= ahci_tegra.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_XGENE)	+= ahci_xgene.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_QORIQ)	+= ahci_qoriq.o libahci.o libahci_platform.o
+obj-$(CONFIG_AHCI_ZHAOXIN_SGPIO) += ahci_zhaoxin_sgpio.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)		+= pdc_adma.o
diff --git a/drivers/ata/ahci_zhaoxin_sgpio.c b/drivers/ata/ahci_zhaoxin_sgpio.c
new file mode 100644
index 000000000000..ad0715bc389e
--- /dev/null
+++ b/drivers/ata/ahci_zhaoxin_sgpio.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  ahci_zhaoxin_sgpio.c - Driver for Zhaoxin sgpio
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <linux/libata.h>
+#include <linux/pci.h>
+
+#include "ahci.h"
+#include "libata.h"
+#include "ahci_zhaoxin_sgpio.h"
+
+static LIST_HEAD(sgpio_zhaoxin_list);
+
+static unsigned int zhaoxin_em_type __read_mostly = AHCI_EM_MSG_LED_MODE; /*LED protocol*/
+module_param(zhaoxin_em_type, int, 0644);
+MODULE_PARM_DESC(zhaoxin_em_type,
+	"AHCI Enclosure Management Message type control (1 = led on, 2 = sgpio on,3 = sgpio gp on)");
+
+int ahci_wait_em_reset(struct sgpio_zhaoxin *sgpio_zhaoxin, u32 retry)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	u32 em_ctl;
+
+	if (!sgpio_zhaoxin || retry == 0) {
+		pr_err("In ahci wait em reset, invalid param\n");
+		return -EINVAL;
+	}
+
+	while (retry--) { /*EM_CTL needs reset at least 64ms*/
+		em_ctl = readl(mmio + HOST_EM_CTL);
+		if (em_ctl & EM_CTL_RST)
+			usleep_range(10000, 20000); /*EM_CTL still in reset, usleep 10ms*/
+		else
+			break;
+
+		if (!retry)
+			pr_err("Wait for EM_CTL reset, time out\n");
+	}
+
+	return 0;
+}
+
+void ahci_zhaoxin_set_em_sgpio(struct sgpio_zhaoxin *sgpio_zhaoxin)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	void __iomem *em_mmio = mmio + SGPIO_OFFSET;
+
+	u32 read;
+
+	sgpio_zhaoxin->sgpio_reg.cfg_0.enable = 1;
+
+	sgpio_zhaoxin->sgpio_reg.cfg_1.blink_gen_a = 0x7;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.blink_gen_b = 0x3;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.blink_gen_c = 0x0;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.stretch_act_on = 0;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.stretch_act_off = 0;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.max_act_on = 2;
+	sgpio_zhaoxin->sgpio_reg.cfg_1.force_act_off = 1;
+
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.sload = 0xf;
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.count = 0x0;
+
+	sgpio_zhaoxin->sgpio_reg.transmit_0.sgpio_tx_0 = 0;
+	sgpio_zhaoxin->sgpio_reg.transmit_1.sgpio_tx_1 = 0;
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_reg.sgpio_tx_gp = 0;
+
+	sgpio_zhaoxin->sgpio_reg.receive_reg.sgpio_rx = 0x07070707;
+	sgpio_zhaoxin->sgpio_reg.gp_receive_reg.sgpio_rx_gp = 0;
+
+	/*Setup SGPIO type*/
+	read = readl(mmio + sgpio_zhaoxin->em_loc);
+	read = read | SGPIO_MESSAGE_HEAD;  /*LED register MSG_HEAD, select SGPIO*/
+	writel(read, mmio + sgpio_zhaoxin->em_loc);
+
+	/*Setup gp mode*/
+	writel(sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.sgpio_tx_gp_cfg, em_mmio + 0x38);
+
+	/*Initial SGPIO CFG1*/
+	writel(sgpio_zhaoxin->sgpio_reg.cfg_1.sgpio_cfg_1, em_mmio + 0x4);
+
+	/*Initial SGPIO CFG0*/
+	read = readl(em_mmio);
+	read |= sgpio_zhaoxin->sgpio_reg.cfg_0.sgpio_cfg_0;
+	writel(read, em_mmio);
+}
+
+void ahci_zhaoxin_set_em_sgpio_gpmode(struct sgpio_zhaoxin *sgpio_zhaoxin)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	void __iomem *em_mmio = mmio + SGPIO_OFFSET;
+	u32 read;
+
+	sgpio_zhaoxin->sgpio_reg.cfg_0.enable = 1;
+
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.sload = 0xf;
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.count = 0xff;
+
+	sgpio_zhaoxin->sgpio_reg.transmit_0.sgpio_tx_0 = 0;
+	sgpio_zhaoxin->sgpio_reg.transmit_1.sgpio_tx_1 = 0;
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_reg.sgpio_tx_gp = 0;
+
+	sgpio_zhaoxin->sgpio_reg.receive_reg.sgpio_rx = 0;
+	sgpio_zhaoxin->sgpio_reg.gp_receive_reg.sgpio_rx_gp = 0xff0f0000;
+
+	/*Setup SGPIO type*/
+	read = readl(mmio + sgpio_zhaoxin->em_loc);
+	read |= SGPIO_MESSAGE_HEAD;
+	writel(read, mmio + sgpio_zhaoxin->em_loc);
+
+	/*Setup gp mode*/
+	writel(sgpio_zhaoxin->sgpio_reg.gp_transmit_cfg.sgpio_tx_gp_cfg, em_mmio + 0x38);
+
+	/*Enable SGPIO*/
+	writel(sgpio_zhaoxin->sgpio_reg.cfg_0.sgpio_cfg_0, em_mmio);
+}
+
+static ssize_t ahci_em_type_sys_show(struct sgpio_zhaoxin *sgpio_zhaoxin, char *buf)
+{
+	return sprintf(buf, "0x%x\n", zhaoxin_em_type);
+}
+static ssize_t ahci_em_type_sys_store(struct sgpio_zhaoxin *sgpio_zhaoxin, const char *buf,
+				      size_t count)
+{
+	int code = 0;
+	int rc = 0;
+
+	if (kstrtouint(buf, 0, &code))
+		return count;
+
+	if (code == AHCI_EM_MSG_LED_MODE) {
+		zhaoxin_em_type = code;
+	} else if (code == AHCI_EM_MSG_SGPIO_MODE) {
+		rc = ahci_wait_em_reset(sgpio_zhaoxin, 7); /*wait at least 64ms*/
+		if (rc < 0) {
+			pr_err("ahci wait em reset failed!\n");
+			return rc;
+		}
+		zhaoxin_em_type = code;
+		ahci_zhaoxin_set_em_sgpio(sgpio_zhaoxin);
+	} else if (code == AHCI_EM_MSG_SGPIO_GP_MODE) {
+		rc = ahci_wait_em_reset(sgpio_zhaoxin, 7); /*wait at least 64ms*/
+		if (rc < 0) {
+			pr_err("ahci wait em reset failed!\n");
+			return rc;
+		}
+		zhaoxin_em_type = code;
+		ahci_zhaoxin_set_em_sgpio_gpmode(sgpio_zhaoxin);
+	} else
+		pr_err("Incorrect value:1 = LED on, 2 = SGPIO normal on, 3 = SGPIO GP on)\n");
+
+	return count;
+}
+
+static ssize_t ahci_transmit_sgpio_message(unsigned long port_num,
+					   struct sgpio_zhaoxin *sgpio_zhaoxin, u16 state,
+					   ssize_t size)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	void __iomem *em_mmio = mmio + SGPIO_OFFSET;
+	unsigned long flags;
+
+	if (!(sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO))
+		return -EINVAL;
+
+	spin_lock_irqsave(&sgpio_zhaoxin->wr_lock, flags);
+
+	switch (port_num) {
+	case 0:
+		writel(SGPIO_MESSAGE_HEAD, mmio + sgpio_zhaoxin->em_loc);
+		writew(state, em_mmio + 0x22);
+		sgpio_zhaoxin->sgpio_reg.transmit_0.sgpio_tx_0 &= 0x0000ffff;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_0_active = (state & 0x3c0) >> 6;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_0_locate = (state & 0x38) >> 3;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_0_error = state & 0x7;
+		break;
+	case 1:
+		writel(SGPIO_MESSAGE_HEAD, mmio + sgpio_zhaoxin->em_loc);
+		writew(state, em_mmio + 0x20);
+		sgpio_zhaoxin->sgpio_reg.transmit_0.sgpio_tx_0 &= 0xffff0000;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_1_active = (state & 0x3c0) >> 6;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_1_locate = (state & 0x38) >> 3;
+		sgpio_zhaoxin->sgpio_reg.transmit_0.drive_1_error = state & 0x7;
+		break;
+	case 2:
+		writel(SGPIO_MESSAGE_HEAD, mmio + sgpio_zhaoxin->em_loc);
+		writew(state, em_mmio + 0x26);
+		sgpio_zhaoxin->sgpio_reg.transmit_1.sgpio_tx_1 &= 0x0000ffff;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_2_active = (state & 0x3c0) >> 6;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_2_locate = (state & 0x38) >> 3;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_2_error = state & 0x7;
+		break;
+	case 3:
+		writel(SGPIO_MESSAGE_HEAD, mmio + sgpio_zhaoxin->em_loc);
+		writew(state, em_mmio + 0x24);
+		sgpio_zhaoxin->sgpio_reg.transmit_1.sgpio_tx_1 &= 0xffff0000;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_3_active = (state & 0x3c0) >> 6;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_3_locate = (state & 0x38) >> 3;
+		sgpio_zhaoxin->sgpio_reg.transmit_1.drive_3_error = state & 0x7;
+		break;
+	default:
+		pr_err("Unsupported port number in this controller\n");
+		break;
+	}
+
+	spin_unlock_irqrestore(&sgpio_zhaoxin->wr_lock, flags);
+
+	return size;
+}
+
+static ssize_t ahci_transmit_sgpio_indicator(unsigned long port_num,
+					     struct sgpio_zhaoxin *sgpio_zhaoxin,
+					     u8 indicator_code, enum SGPIO_INDICATOR type,
+					     ssize_t size)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	void __iomem *em_mmio = mmio + SGPIO_OFFSET;
+	u16 state;
+
+	if (!(sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO))
+		return -EINVAL;
+
+	if (get_ahci_em_messages() && (zhaoxin_em_type != AHCI_EM_MSG_SGPIO_MODE)) {
+		pr_err("Current setting not SGPIO normal mode, quit\n");
+		return -EINVAL;
+	}
+
+	switch (port_num) {
+	case 0:
+		state = readw(em_mmio + 0x22);
+		break;
+	case 1:
+		state = readw(em_mmio + 0x20);
+		break;
+	case 2:
+		state = readw(em_mmio + 0x26);
+		break;
+	case 3:
+		state = readw(em_mmio + 0x24);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (type == SGPIO_ACTIVITY) {
+		state &= 0xfc3f;
+		state |= (indicator_code&0xf) << 6;
+	} else if (type == SGPIO_LOCATE) {
+		state &= 0xffc7;
+		state |= (indicator_code&0x7) << 3;
+	} else if (type == SGPIO_ERROR) {
+		state &= 0xfff8;
+		state |= indicator_code & 0x7;
+	} else {
+		return -EINVAL;
+	}
+
+	return ahci_transmit_sgpio_message(port_num, sgpio_zhaoxin, state, size);
+}
+
+static ssize_t ahci_transmit_sgpio_indicator_gp(unsigned long port_num,
+						struct sgpio_zhaoxin *sgpio_zhaoxin,
+						u8 indicator_code, enum SGPIO_INDICATOR type,
+						ssize_t size)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	void __iomem *em_mmio = mmio + SGPIO_OFFSET;
+	union SGPIO_TX_GP state;
+	unsigned long flags;
+
+	if (!(sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO))
+		return -EINVAL;
+
+	if (get_ahci_em_messages() && (zhaoxin_em_type != AHCI_EM_MSG_SGPIO_GP_MODE)) {
+		pr_err("Current setting not SGPIO_GP mode, quit\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&sgpio_zhaoxin->wr_lock, flags);
+
+	state.sgpio_tx_gp = readl(em_mmio + 0x3c);
+	switch (port_num) {
+	case 0:
+		if (type == SGPIO_ACTIVITY)
+			state.D00 = indicator_code & 0x1;
+		else if (type == SGPIO_LOCATE)
+			state.D01 = indicator_code & 0x1;
+		else if (type == SGPIO_ERROR)
+			state.D02 = indicator_code & 0x1;
+		break;
+	case 1:
+		if (type == SGPIO_ACTIVITY)
+			state.D10 = indicator_code & 0x1;
+		else if (type == SGPIO_LOCATE)
+			state.D11 = indicator_code & 0x1;
+		else if (type == SGPIO_ERROR)
+			state.D12 = indicator_code & 0x1;
+		break;
+	case 2:
+		if (type == SGPIO_ACTIVITY)
+			state.D20 = indicator_code & 0x1;
+		else if (type == SGPIO_LOCATE)
+			state.D21 = indicator_code & 0x1;
+		else if (type == SGPIO_ERROR)
+			state.D22 = indicator_code & 0x1;
+		break;
+	case 3:
+		if (type == SGPIO_ACTIVITY)
+			state.D30 = indicator_code & 0x1;
+		else if (type == SGPIO_LOCATE)
+			state.D31 = indicator_code & 0x1;
+		else if (type == SGPIO_ERROR)
+			state.D32 = indicator_code & 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	writel(SGPIO_MESSAGE_HEAD, mmio + sgpio_zhaoxin->em_loc);
+	writel(state.sgpio_tx_gp, em_mmio + 0x3c);
+	sgpio_zhaoxin->sgpio_reg.gp_transmit_reg.sgpio_tx_gp = state.sgpio_tx_gp;
+
+	spin_unlock_irqrestore(&sgpio_zhaoxin->wr_lock, flags);
+	return size;
+}
+
+static ssize_t sgpio_activity_store(struct sgpio_zhaoxin *sgpio_zhaoxin, const char *buf,
+				    size_t count)
+{
+	unsigned long val = 0;
+	unsigned long port_num = 0;
+	unsigned long code = 0;
+
+	if (kstrtoul(buf, 0, &val))
+		return count;
+
+	port_num = val & 0xf;
+	code = val >> 4;
+
+	if (sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO) {
+		switch (code) {
+		case 0x0:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_DISABLE, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x1:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_ENABLE, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x2:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GA_FON, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x3:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GA_FOFF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x4:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_BRIEF_EN_EOF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x5:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_BRIEF_EN_SOF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x6:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GB_FON, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x7:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GB_FOFF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x8:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GC_FON, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x9:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      ACTIVITY_GC_FOFF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x10:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin,
+							 GP_OFF, SGPIO_ACTIVITY, 1);
+			break;
+		case 0x11:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin,
+							 GP_ON, SGPIO_ACTIVITY, 1);
+			break;
+		default:
+			pr_err("Unsupported command for activity indicator, cmd:0x%lx\n", val);
+			break;
+		}
+
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t sgpio_locate_store(struct sgpio_zhaoxin *sgpio_zhaoxin, const char *buf,
+				  size_t count)
+{
+	unsigned long val = 0;
+	unsigned long port_num = 0;
+	unsigned long code = 0;
+
+	if (kstrtoul(buf, 0, &val))
+		return count;
+
+	port_num = val & 0xf;
+	code = val >> 4;
+
+	if (sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO)  {
+		switch (code) {
+		case 0x0:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_DISABLE, SGPIO_LOCATE, 1);
+			break;
+		case 0x1:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_ENABLE, SGPIO_LOCATE, 1);
+			break;
+		case 0x2:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GA_FON, SGPIO_LOCATE, 1);
+			break;
+		case 0x3:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GA_FOFF, SGPIO_LOCATE, 1);
+			break;
+		case 0x4:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GB_FON, SGPIO_LOCATE, 1);
+			break;
+		case 0x5:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GB_FOFF, SGPIO_LOCATE, 1);
+			break;
+		case 0x6:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GC_FON, SGPIO_LOCATE, 1);
+			break;
+		case 0x7:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GC_FOFF, SGPIO_LOCATE, 1);
+			break;
+		case 0x10:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin,
+							 GP_OFF, SGPIO_LOCATE, 1);
+			break;
+		case 0x11:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin, GP_ON,
+							 SGPIO_LOCATE, 1);
+			break;
+		default:
+			pr_err("Unsupported command for locate indicator, cmd:0x%lx\n", val);
+			break;
+		}
+
+		return count;
+	}
+	return -EINVAL;
+}
+
+static ssize_t sgpio_error_store(struct sgpio_zhaoxin *sgpio_zhaoxin, const char *buf, size_t count)
+{
+	unsigned long val = 0;
+	unsigned long port_num = 0;
+	unsigned long code = 0;
+
+	if (kstrtoul(buf, 0, &val))
+		return count;
+
+	port_num = val & 0xf;
+	code = val >> 4;
+
+	if (sgpio_zhaoxin->em_msg_type & EM_MSG_TYPE_SGPIO) {
+		switch (code) {
+		case 0x0:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_DISABLE, SGPIO_ERROR, 1);
+			break;
+		case 0x1:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_ENABLE, SGPIO_ERROR, 1);
+			break;
+		case 0x2:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GA_FON, SGPIO_ERROR, 1);
+			break;
+		case 0x3:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GA_FOFF, SGPIO_ERROR, 1);
+			break;
+		case 0x4:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GB_FON, SGPIO_ERROR, 1);
+			break;
+		case 0x5:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GB_FOFF, SGPIO_ERROR, 1);
+			break;
+		case 0x6:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GC_FON, SGPIO_ERROR, 1);
+			break;
+		case 0x7:
+			ahci_transmit_sgpio_indicator(port_num, sgpio_zhaoxin,
+						      LOCATE_ERROR_GC_FOFF, SGPIO_ERROR, 1);
+			break;
+		case 0x10:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin,
+							 GP_OFF, SGPIO_ERROR, 1);
+			break;
+		case 0x11:
+			ahci_transmit_sgpio_indicator_gp(port_num, sgpio_zhaoxin,
+							 GP_ON, SGPIO_ERROR, 1);
+			break;
+		default:
+			pr_err("Unsupport command for error indicator, cmd:0x%lx\n", val);
+			break;
+		}
+
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static struct sgpio_zhaoxin_sysfs_attr dev_attr_ahci_em_type_sys =
+		__ATTR(ahci_em_type_sys, 0644, ahci_em_type_sys_show,
+		       ahci_em_type_sys_store);
+static struct sgpio_zhaoxin_sysfs_attr dev_attr_sgpio_activity =
+		__ATTR(sgpio_activity, 0200, NULL, sgpio_activity_store);
+static struct sgpio_zhaoxin_sysfs_attr dev_attr_sgpio_locate =
+		__ATTR(sgpio_locate, 0200, NULL, sgpio_locate_store);
+static struct sgpio_zhaoxin_sysfs_attr dev_attr_sgpio_error =
+		__ATTR(sgpio_error, 0200, NULL, sgpio_error_store);
+
+struct attribute *sgpio_attrs[] = {
+	&dev_attr_ahci_em_type_sys.attr,
+	&dev_attr_sgpio_activity.attr,
+	&dev_attr_sgpio_locate.attr,
+	&dev_attr_sgpio_error.attr,
+	NULL
+};
+
+static const struct attribute_group sgpio_attrs_group = {
+	.attrs = sgpio_attrs
+};
+const struct attribute_group *sgpio_groups[] = {
+	&sgpio_attrs_group,
+	NULL
+};
+
+static ssize_t sgpio_zhaoxin_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct sgpio_zhaoxin_sysfs_attr *sgpio_zhaoxin_sysfs_attr = to_sgpio_attr(attr);
+	struct sgpio_zhaoxin *sgpio_zhaoxin = to_sgpio_obj(kobj);
+
+	if (!sgpio_zhaoxin_sysfs_attr->show)
+		return -EIO;
+
+	return sgpio_zhaoxin_sysfs_attr->show(sgpio_zhaoxin, buf);
+}
+
+static ssize_t  sgpio_zhaoxin_attr_store(struct kobject *kobj, struct attribute *attr,
+					 const char *buf, size_t len)
+{
+	struct sgpio_zhaoxin_sysfs_attr *sgpio_zhaoxin_sysfs_attr = to_sgpio_attr(attr);
+	struct sgpio_zhaoxin *sgpio_zhaoxin = to_sgpio_obj(kobj);
+
+	if (!sgpio_zhaoxin_sysfs_attr->store)
+		return -EIO;
+
+	return sgpio_zhaoxin_sysfs_attr->store(sgpio_zhaoxin, buf, len);
+}
+
+const struct sysfs_ops sgpio_zhaoxin_sysfs_ops = {
+	.show	= sgpio_zhaoxin_attr_show,
+	.store	= sgpio_zhaoxin_attr_store,
+};
+
+const struct kobj_type sgpio_zhaoxin_ktype = {
+	.sysfs_ops	= &sgpio_zhaoxin_sysfs_ops,
+	.default_groups = sgpio_groups,
+};
+
+void set_em_messages(struct sgpio_zhaoxin *sgpio_zhaoxin)
+{
+	void __iomem *mmio = sgpio_zhaoxin->mmio;
+	u32 em_loc = readl(mmio + HOST_EM_LOC);
+	u32 em_ctl = readl(mmio + HOST_EM_CTL);
+	u8 messages;
+
+	if (!get_ahci_em_messages())
+		return;
+
+	messages = (em_ctl & EM_CTRL_MSG_TYPE) >> 16;
+
+	if (messages) {
+		/* store em_loc */
+		sgpio_zhaoxin->em_loc = ((em_loc >> 16) * 4);
+		sgpio_zhaoxin->em_buf_sz = ((em_loc & 0xff) * 4);
+		sgpio_zhaoxin->em_msg_type = messages;
+	}
+}
+
+int add_sgpio_zhaoxin(void)
+{
+	struct pci_dev *pdev_cur = pci_get_device(PCI_VENDOR_ID_ZHAOXIN, 0x9083, NULL);
+	struct pci_dev *pdev_next = pdev_cur;
+	struct sgpio_zhaoxin *sgpio_zhaoxin;
+	int ret = 0;
+
+	if (!get_ahci_em_messages())
+		return 0;
+
+	while (pdev_next) {
+		pdev_next = pci_get_device(PCI_VENDOR_ID_ZHAOXIN, 0x9083, pdev_cur);
+
+		WARN_ON(MAX_TEST_RESULT_LEN <= 0);
+
+		sgpio_zhaoxin = (struct sgpio_zhaoxin *)get_zeroed_page(GFP_KERNEL);
+		if (!sgpio_zhaoxin)
+			return -ENOMEM;
+
+		list_add(&sgpio_zhaoxin->list, &sgpio_zhaoxin_list);
+		ret = kobject_init_and_add(&sgpio_zhaoxin->kobj, &sgpio_zhaoxin_ktype,
+					   &(&pdev_cur->dev)->kobj, "zx_sgpio");
+		if (ret) {
+			kobject_put(&sgpio_zhaoxin->kobj);
+			return -1;
+		}
+
+		kobject_uevent(&sgpio_zhaoxin->kobj, KOBJ_ADD);
+		spin_lock_init(&sgpio_zhaoxin->wr_lock);
+		sgpio_zhaoxin->kobj_valid = 1;
+		sgpio_zhaoxin->mmio = pcim_iomap_table(pdev_cur)[5];
+		set_em_messages(sgpio_zhaoxin);
+		ret = ahci_wait_em_reset(sgpio_zhaoxin, 7); /*wait at least 64ms*/
+		if (ret < 0) {
+			pr_err("ahci wait em reset failed!\n");
+			return ret;
+		}
+
+		sgpio_zhaoxin->kobj_valid = 1;
+
+		if (zhaoxin_em_type == AHCI_EM_MSG_SGPIO_GP_MODE)
+			ahci_zhaoxin_set_em_sgpio_gpmode(sgpio_zhaoxin);
+		else if (zhaoxin_em_type == AHCI_EM_MSG_SGPIO_MODE)
+			ahci_zhaoxin_set_em_sgpio(sgpio_zhaoxin);
+
+		pdev_cur = pdev_next;
+	}
+
+	return 0;
+}
+
+
+void remove_sgpio_zhaoxin(void)
+{
+	struct sgpio_zhaoxin *cur = NULL, *next = NULL;
+
+	if (!get_ahci_em_messages())
+		return;
+
+	list_for_each_entry_safe(cur, next, &sgpio_zhaoxin_list, list) {
+		list_del(&cur->list);
+		if (cur->kobj_valid)
+			kobject_put(&cur->kobj);
+
+		free_page((unsigned long)cur);
+		if (!next)
+			break;
+	}
+}
+
+static int __init zhaoxin_sgpio_init(void)
+{
+	return add_sgpio_zhaoxin();
+}
+
+static void __exit zhaoxin_sgpio_exit(void)
+{
+	remove_sgpio_zhaoxin();
+}
+
+late_initcall(zhaoxin_sgpio_init);
+module_exit(zhaoxin_sgpio_exit);
+
+MODULE_DESCRIPTION("Zhaoxin SGPIO driver");
+MODULE_AUTHOR("XanderChen");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ata/ahci_zhaoxin_sgpio.h b/drivers/ata/ahci_zhaoxin_sgpio.h
new file mode 100644
index 000000000000..b9fd7c665602
--- /dev/null
+++ b/drivers/ata/ahci_zhaoxin_sgpio.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ACHI_ZHAOXIN_SGPIO_H
+#define _ACHI_ZHAOXIN_SGPIO_H
+
+#define SGPIO_OFFSET 0x580
+
+#define SGPIO_MESSAGE_HEAD 0x3000000
+
+#define ACTIVITY_DISABLE 0x0
+#define ACTIVITY_ENABLE 0x1
+#define ACTIVITY_GA_FON 0x2
+#define ACTIVITY_GA_FOFF 0x3
+#define ACTIVITY_BRIEF_EN_EOF 0x4
+#define ACTIVITY_BRIEF_EN_SOF 0x5
+#define ACTIVITY_GB_FON 0x6
+#define ACTIVITY_GB_FOFF 0x7
+#define ACTIVITY_GC_FON 0x8
+#define ACTIVITY_GC_FOFF 0x9
+#define LOCATE_ERROR_DISABLE 0x0
+#define LOCATE_ERROR_ENABLE 0x1
+#define LOCATE_ERROR_GA_FON 0x2
+#define LOCATE_ERROR_GA_FOFF 0x3
+#define LOCATE_ERROR_GB_FON 0x4
+#define LOCATE_ERROR_GB_FOFF 0x5
+#define LOCATE_ERROR_GC_FON 0x6
+#define LOCATE_ERROR_GC_FOFF 0x7
+
+#define GP_OFF 0x10
+#define GP_ON 0x11
+
+#define to_sgpio_attr(x) container_of(x, struct sgpio_zhaoxin_sysfs_attr, attr)
+#define to_sgpio_obj(x) container_of(x, struct sgpio_zhaoxin, kobj)
+#define MAX_TEST_RESULT_LEN (PAGE_SIZE - sizeof(struct sgpio_zhaoxin) - 8)
+
+//SGPIO module parameter: 0-off, 1-LED, 2-SGPIO, 3-SGPIO_GP
+enum ahci_em_msg_modes {
+	AHCI_EM_MSG_OFF = 0,
+	AHCI_EM_MSG_LED_MODE,
+	AHCI_EM_MSG_SGPIO_MODE,
+	AHCI_EM_MSG_SGPIO_GP_MODE,
+	AHCI_EM_MSG_NULL,
+};
+
+enum SGPIO_INDICATOR {
+	SGPIO_ACTIVITY,
+	SGPIO_LOCATE,
+	SGPIO_ERROR
+};
+
+enum SGPIO_CFG1 {
+	STRETCH_ACTIVITY_OFF,
+	STRETCH_ACTIVITY_ON,
+	FORCE_ACTIVITY_OFF,
+	MAXIMUM_ACTIVITY_ON,
+	BLINK_GENERATIOR_RATE_B,
+	BLINK_GENERATIOR_RATE_A,
+	BLINK_GENERATIOR_RATE_C
+};
+
+union SGPIO_CFG_0 {
+	struct {
+		u32 reserved0 :8;
+		u32 version :4;
+		u32 reserved1 :4;
+		u32 gp_register_count :4;
+		u32 cfg_register_count :3;
+		u32 enable :1;
+		u32 supported_drive_count :8;
+	};
+	u32 sgpio_cfg_0;
+};
+
+union SGPIO_CFG_1 {
+	struct {
+		u32 reserved0 :4;
+		u32 blink_gen_c :4;
+		u32 blink_gen_a :4;
+		u32 blink_gen_b :4;
+		u32 max_act_on :4;
+		u32 force_act_off :4;
+		u32 stretch_act_on :4;
+		u32 stretch_act_off :4;
+	};
+	u32 sgpio_cfg_1;
+};
+
+union SGPIO_RX {
+	struct {
+		u32 drive_3_input :3;
+		u32 reserved3 :5;
+		u32 drive_2_input :3;
+		u32 reserved2 :5;
+		u32 drive_1_input :3;
+		u32 reserved1 :5;
+		u32 drive_0_input :3;
+		u32 reserved0 :5;
+	};
+	u32 sgpio_rx;
+};
+
+union SGPIO_RX_GP_CFG {
+	struct {
+		u32 reserved0 :16;
+		u32 count :8;
+		u32 reserved1 :8;
+	};
+	u32 sgpio_rx_gp_cfg;
+};
+union SGPIO_RX_GP {
+	struct {
+		u32 reserved0 :16;
+		u32 D22 :1;
+		u32 D30 :1;
+		u32 D31 :1;
+		u32 D32 :1;
+		u32 reserved1:4;
+		u32 D00 :1;
+		u32 D01 :1;
+		u32 D02 :1;
+		u32 D10 :1;
+		u32 D11 :1;
+		u32 D12 :1;
+		u32 D20 :1;
+		u32 D21 :1;
+	};
+	u32 sgpio_rx_gp;
+};
+
+union SGPIO_TX_0 {
+	struct {
+		u32 drive_1_error :3;
+		u32 drive_1_locate :3;
+		u32 drive_1_active :4;
+		u32 reserved1 :6;
+		u32 drive_0_error :3;
+		u32 drive_0_locate :3;
+		u32 drive_0_active :4;
+		u32 reserved0 :6;
+	};
+	u32 sgpio_tx_0;
+};
+
+union SGPIO_TX_1 {
+	struct {
+		u32 drive_3_error :3;
+		u32 drive_3_locate :3;
+		u32 drive_3_active :4;
+		u32 reserved3 :6;
+		u32 drive_2_error :3;
+		u32 drive_2_locate :3;
+		u32 drive_2_active :4;
+		u32 reserved2 :6;
+	};
+	u32 sgpio_tx_1;
+};
+
+union SGPIO_TX_GP_CFG {
+	struct {
+		u32 reserved0 :16;
+		u32 count :8;
+		u32 sload :4;
+		u32 reserved1 :4;
+	};
+	u32 sgpio_tx_gp_cfg;
+};
+
+union SGPIO_TX_GP {
+	struct {
+		u32 reserved0 :16;
+		u32 D22 :1;
+		u32 D30 :1;
+		u32 D31 :1;
+		u32 D32 :1;
+		u32 reserved1:4;
+		u32 D00 :1;
+		u32 D01 :1;
+		u32 D02 :1;
+		u32 D10 :1;
+		u32 D11 :1;
+		u32 D12 :1;
+		u32 D20 :1;
+		u32 D21 :1;
+	};
+	u32 sgpio_tx_gp;
+};
+
+struct AHCI_SGPIO_REG {
+	union SGPIO_CFG_0 cfg_0;
+	union SGPIO_CFG_1 cfg_1;
+	union SGPIO_RX receive_reg;
+	union SGPIO_RX_GP_CFG gp_receive_cfg;
+	union SGPIO_RX_GP gp_receive_reg;
+	union SGPIO_TX_0 transmit_0;
+	union SGPIO_TX_1 transmit_1;
+	union SGPIO_TX_GP_CFG gp_transmit_cfg;
+	union SGPIO_TX_GP gp_transmit_reg;
+};
+
+struct sgpio_zhaoxin {
+	struct kobject kobj;
+	struct list_head list;
+	unsigned int kobj_valid;
+	unsigned int index;
+	u32 em_loc; /* enclosure management location */
+	u32 em_buf_sz; /* EM buffer size in byte */
+	u32 em_msg_type; /* EM message type */
+	void __iomem *mmio;
+	spinlock_t wr_lock; /* protects sgpio register */
+	struct  AHCI_SGPIO_REG sgpio_reg; /* saved sgpio register */
+};
+
+struct sgpio_zhaoxin_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct sgpio_zhaoxin *sgpio_zhaoxin, char *buf);
+	ssize_t (*store)(struct sgpio_zhaoxin *sgpio_zhaoxin, const char *buf, size_t count);
+};
+
+int get_ahci_em_messages(void);
+
+#endif /* _ACHI_ZHAOXIN_SGPIO_H */
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index f1263364fa97..6524c5a02648 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -207,6 +207,12 @@ static int devslp_idle_timeout __read_mostly = 1000;
 module_param(devslp_idle_timeout, int, 0644);
 MODULE_PARM_DESC(devslp_idle_timeout, "device sleep idle timeout");
 
+int get_ahci_em_messages(void)
+{
+	return ahci_em_messages;
+}
+EXPORT_SYMBOL_GPL(get_ahci_em_messages);
+
 static void ahci_enable_ahci(void __iomem *mmio)
 {
 	int i;

From e123b1b27cb93cb5a8c21cf8755bc89fc386cd93 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 5 Aug 2024 11:34:45 +0800
Subject: [PATCH 27/39] anolis: efi: cper: Add Zhaoxin/Centaur ZDI/ZPI error
 decode

ANBZ: #9439

ZPI is the interconnection interface between sockets, ZDI is the
interconnection interface between dies.

When either zdi or zpi occurs error, it will trigger smi interrput, the
smi handler will read error information from the zdi/zpi configuration
space, fill it in the cper structure asscoiated with error and produce a
sci or nmi interrput to notify the OS ,the OS driver will decode the cper
structure to help user to annalyze the error.

Because UEFI spec does not define the section type of ZDI/ZPI error.
Zhaoxin defines ZDI/ZPI errors according to the error format defined by
the Generic Processor Error Section type.When the error occurs, The BIOS
will fill error information  in the data structure corresponding to the
Generic Processor Error Section type in the smi handler.However,the error
information printed by default  apei driver is not easy to read.

The software has added some printed logs to make the ZDI/ZPI error
information on the Zhaoxin/Centaur cpu vendor easier to read.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/firmware/efi/cper.c | 55 +++++++++++++++++++++++++++++++++++++
 include/linux/cper.h        |  1 +
 2 files changed, 56 insertions(+)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 35c37f667781..bcbcf4d37cce 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -141,6 +141,59 @@ static const char * const proc_flag_strs[] = {
 	"corrected",
 };
 
+static const char *const zdi_zpi_err_type_strs[] = {
+	"No Error",
+	"Training Error Status (PHY)",
+	"Data Link Protocol Error Status (DLL)",
+	"Surprise Down Error Status",
+	"Flow Control Protocol Error Status (TL)",
+	"Receiver Overflow Status (TL)",
+	"Receiver Error Status (PHY)",
+	"Bad TLP Status (DLL)",
+	"Bad Data Link Layer Packet (DLLP) Status (DLL)",
+	"REPLAY_NUM Rollover Status (DLL)",
+	"Replay Timer Timeout Status (DLL)",
+	"X16 Link Width Unreliable Status",
+	"ZPI X8 Link Width Unreliable Status",
+	"ZPI X4 Link Width Unreliable Status",
+	"ZPI X2 Link Width Unreliable Status",
+	"ZPI Gen3 Link Speed Unreliable Status",
+	"ZPI Gen2 Link Speed Unreliable Status",
+	"ZDI Gen3 Link Speed Unreliable Status",
+	"ZDI Gen4 Link Speed Unreliable Status",
+};
+
+const char *cper_zdi_zpi_err_type_str(unsigned int etype)
+{
+	return etype < ARRAY_SIZE(zdi_zpi_err_type_strs) ?
+		zdi_zpi_err_type_strs[etype] : "unknown error";
+}
+EXPORT_SYMBOL_GPL(cper_zdi_zpi_err_type_str);
+
+static void cper_print_proc_generic_zdi_zpi(const char *pfx,
+					    const struct cper_sec_proc_generic *zdi_zpi)
+{
+#if IS_ENABLED(CONFIG_X86)
+	u8 etype = zdi_zpi->responder_id;
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
+		if ((zdi_zpi->requestor_id & 0xff) == 7) {
+			pr_info("%s general processor error(zpi error)\n", pfx);
+		} else if ((zdi_zpi->requestor_id & 0xff) == 6) {
+			pr_info("%s general processor error(zdi error)\n", pfx);
+		} else {
+			pr_info("%s general processor error(unknown error)\n", pfx);
+			return;
+		}
+		pr_info("%s bus number %llx device number %llx function number 0\n", pfx,
+			((zdi_zpi->requestor_id)>>8) & 0xff, zdi_zpi->requestor_id & 0xff);
+		pr_info("%s apic id %lld error_type:  %s\n", pfx, zdi_zpi->proc_id,
+			cper_zdi_zpi_err_type_str(etype));
+	}
+#endif
+}
+
 static void cper_print_proc_generic(const char *pfx,
 				    const struct cper_sec_proc_generic *proc)
 {
@@ -184,6 +237,8 @@ static void cper_print_proc_generic(const char *pfx,
 		       pfx, proc->responder_id);
 	if (proc->validation_bits & CPER_PROC_VALID_IP)
 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
+
+	cper_print_proc_generic_zdi_zpi(pfx, proc);
 }
 
 static const char * const mem_err_type_strs[] = {
diff --git a/include/linux/cper.h b/include/linux/cper.h
index c1a7dc325121..ba5ee2355370 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -578,4 +578,5 @@ void cper_estatus_print(const char *pfx,
 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
 int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
 
+const char *cper_zdi_zpi_err_type_str(unsigned int etype);
 #endif

From f45a2165bbabf8d57c7fcf73bdcf55d506127681 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Tue, 2 Jan 2024 19:00:15 +0800
Subject: [PATCH 28/39] i2c: smbus: Add support for Zhaoxin SMBUS controller

The Zhaoxin platform implements the SMBUS controller on the hardware,
enabling information exchange and collaboration between devices using the
SMBus protocol.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/i2c/busses/Kconfig             |  11 +
 drivers/i2c/busses/Makefile            |   1 +
 drivers/i2c/busses/i2c-zhaoxin-smbus.c | 385 +++++++++++++++++++++++++
 3 files changed, 397 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-zhaoxin-smbus.c

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index a57c6760fa20..e49b3137c596 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -360,6 +360,17 @@ config I2C_SCMI
 	  To compile this driver as a module, choose M here:
 	  the module will be called i2c-scmi.
 
+config I2C_ZHAOXIN_SMBUS
+	tristate "Zhaoxin SMBus Interface"
+	depends on PCI || COMPILE_TEST
+	default m
+	help
+	  If you say yes to this option, support will be included for the
+	  ZHAOXIN SMBus interface
+
+	  This driver can also be built as a module. If so, the module
+	  will be called i2c-zhaoxin-smbus.
+
 endif # ACPI
 
 comment "Mac SMBus host controller drivers"
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index bef7c205433b..f8c8a3554427 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -139,6 +139,7 @@ obj-$(CONFIG_I2C_ROBOTFUZZ_OSIF)	+= i2c-robotfuzz-osif.o
 obj-$(CONFIG_I2C_TAOS_EVM)	+= i2c-taos-evm.o
 obj-$(CONFIG_I2C_TINY_USB)	+= i2c-tiny-usb.o
 obj-$(CONFIG_I2C_VIPERBOARD)	+= i2c-viperboard.o
+obj-$(CONFIG_I2C_ZHAOXIN_SMBUS)	+= i2c-zhaoxin-smbus.o
 
 # Other I2C/SMBus bus drivers
 obj-$(CONFIG_I2C_ACORN)		+= i2c-acorn.o
diff --git a/drivers/i2c/busses/i2c-zhaoxin-smbus.c b/drivers/i2c/busses/i2c-zhaoxin-smbus.c
new file mode 100644
index 000000000000..52c689e928af
--- /dev/null
+++ b/drivers/i2c/busses/i2c-zhaoxin-smbus.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Zhaoxin SMBus controller driver
+ *
+ * Copyright(c) 2023 Shanghai Zhaoxin Semiconductor Corporation.
+ * All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define DRIVER_VERSION "3.1.0"
+
+#define ZXSMB_NAME "smbus_zhaoxin"
+
+/*
+ * registers
+ */
+/* SMBus MMIO address offsets */
+#define ZXSMB_STS		0x00
+#define		ZXSMB_BUSY			BIT(0)
+#define		ZXSMB_CMD_CMPLET	BIT(1)
+#define		ZXSMB_DEV_ERR		BIT(2)
+#define		ZXSMB_BUS_CLSI		BIT(3)
+#define		ZXSMB_FAIL_TRANS	BIT(4)
+#define			ZXSMB_STS_MASK		GENMASK(4, 0)
+#define		ZXSMB_NSMBSRST		BIT(5)
+#define	ZXSMB_CTL		0x02
+#define		ZXSMB_CMPLT_EN		BIT(0)
+#define		ZXSMB_KILL_PRG		BIT(1)
+#define		ZXSMB_START			BIT(6)
+#define		ZXSMB_PEC_EN		BIT(7)
+#define	ZXSMB_CMD		0x03
+#define	ZXSMB_ADD		0x04
+#define	ZXSMB_DAT0		0x05
+#define	ZXSMB_DAT1		0x06
+#define	ZXSMB_BLKDAT	0x07
+
+/*
+ * platform related information
+ */
+ /* protocol cmd constants */
+#define ZXSMB_QUICK				0x00
+#define ZXSMB_BYTE				0x04
+#define ZXSMB_BYTE_DATA			0x08
+#define ZXSMB_WORD_DATA			0x0C
+#define ZXSMB_PROC_CALL			0x10
+#define ZXSMB_BLOCK_DATA		0x14
+#define ZXSMB_I2C_10_BIT_ADDR	0x18
+#define ZXSMB_I2C_PROC_CALL		0x30
+#define ZXSMB_I2C_BLOCK_DATA	0x34
+#define ZXSMB_I2C_7_BIT_ADDR	0x38
+#define ZXSMB_UNIVERSAL			0x3C
+
+#define ZXSMB_TIMEOUT 500
+
+struct zxsmb {
+	struct device *dev;
+	struct i2c_adapter adap;
+	struct completion complete;
+	u16 base;
+	int irq;
+	u8 status;
+	int size;
+	u8 pec;
+};
+
+static irqreturn_t zxsmb_irq_handle(int irq, void *dev_id)
+{
+	struct zxsmb *smb = (struct zxsmb *)dev_id;
+
+	smb->status = inb(smb->base + ZXSMB_STS);
+	if ((smb->status & ZXSMB_STS_MASK) == 0)
+		return IRQ_NONE;
+
+	/* clear status */
+	outb(smb->status, smb->base + ZXSMB_STS);
+	complete(&smb->complete);
+
+	return IRQ_HANDLED;
+}
+
+static int zxsmb_status_check(struct zxsmb *smb)
+{
+	if (smb->status & ZXSMB_CMD_CMPLET)
+		return 0;
+
+	if (smb->status & ZXSMB_BUS_CLSI) {
+		dev_err(smb->dev, "Lost arbitration\n");
+		outb(ZXSMB_KILL_PRG, smb->base + ZXSMB_CTL);
+		return -EAGAIN;
+	}
+
+	dev_dbg(smb->dev, "Trans failed, status = 0x%X\n", smb->status);
+
+	return -EIO;
+}
+
+static int zxsmb_wait_interrput_finish(struct zxsmb *smb)
+{
+	int time_left;
+
+	time_left = wait_for_completion_timeout(&smb->complete, msecs_to_jiffies(ZXSMB_TIMEOUT));
+	if (time_left == 0) {
+		u8 status = inb(smb->base + ZXSMB_STS);
+
+		/* some host's irq config not work well */
+		if (status & ZXSMB_STS_MASK) {
+			outb(status, smb->base + ZXSMB_STS);
+			outb(ZXSMB_KILL_PRG, smb->base + ZXSMB_CTL);
+			devm_free_irq(smb->dev, smb->irq, smb);
+			smb->irq = 0;
+			dev_warn(smb->dev, "change to polling mode\n");
+
+			return -EAGAIN;
+		}
+		dev_dbg(smb->dev, "interrput timeout\n");
+		return -EIO;
+	}
+
+	return zxsmb_status_check(smb);
+}
+
+static int zxsmb_wait_polling_finish(struct zxsmb *smb)
+{
+	int status;
+	int time_left = ZXSMB_TIMEOUT * 10;
+
+	do {
+		usleep_range(100, 200);
+		status = inb(smb->base + ZXSMB_STS);
+	} while ((status & ZXSMB_BUSY) && (--time_left));
+
+	if (time_left == 0) {
+		dev_dbg(smb->dev, "polling timeout\n");
+		return -EIO;
+	}
+
+	/* clear status */
+	outb(status, smb->base + ZXSMB_STS);
+	smb->status = status;
+
+	return zxsmb_status_check(smb);
+}
+
+static int zxsmb_trans_start(struct zxsmb *smb)
+{
+	u16 base = smb->base;
+	int tmp;
+
+	/* Make sure the SMBus host is ready to start transmitting */
+	tmp = inb(base + ZXSMB_STS);
+	if (tmp & ZXSMB_BUSY) {
+		outb(tmp, base + ZXSMB_STS);
+		usleep_range(1000, 5000);
+		tmp = inb(base + ZXSMB_STS);
+		if (tmp & ZXSMB_BUSY) {
+			dev_err(smb->dev, "SMBus reset failed! (0x%02x)\n", tmp);
+			return -EIO;
+		}
+	}
+
+	tmp = ZXSMB_START | smb->size;
+
+	if (smb->pec)
+		tmp |= ZXSMB_PEC_EN;
+	else
+		tmp &= (~ZXSMB_PEC_EN);
+
+	if (smb->irq)
+		tmp |= ZXSMB_CMPLT_EN;
+
+	reinit_completion(&smb->complete);
+	smb->status = 0;
+	outb(tmp, base + ZXSMB_CTL);
+	return 0;
+}
+
+static int zxsmb_transaction(struct zxsmb *smb)
+{
+	int err;
+
+	err = zxsmb_trans_start(smb);
+	if (err)
+		return err;
+
+	if (smb->irq)
+		err = zxsmb_wait_interrput_finish(smb);
+	else
+		err = zxsmb_wait_polling_finish(smb);
+
+	outb(0, smb->base + ZXSMB_CTL);
+	return err;
+}
+
+static int zxsmb_smbus_xfer(struct i2c_adapter *adap, u16 addr, u16 flags, char read, u8 command,
+				int size, union i2c_smbus_data *data)
+{
+	int i;
+	int err;
+	u8 len;
+	struct zxsmb *smb = (struct zxsmb *)i2c_get_adapdata(adap);
+	u16 base = smb->base;
+
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+		size = ZXSMB_QUICK;
+		break;
+	case I2C_SMBUS_BYTE:
+		size = ZXSMB_BYTE;
+		if (!read)
+			outb(command, base + ZXSMB_CMD);
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		outb(command, base + ZXSMB_CMD);
+		if (!read)
+			outb(data->byte, base + ZXSMB_DAT0);
+		size = ZXSMB_BYTE_DATA;
+		break;
+	case I2C_SMBUS_PROC_CALL:
+	case I2C_SMBUS_WORD_DATA:
+		if (read && size == I2C_SMBUS_PROC_CALL)
+			goto exit_unsupported;
+		outb(command, base + ZXSMB_CMD);
+		if (!read) {
+			outb(data->word & 0xff, base + ZXSMB_DAT0);
+			outb((data->word & 0xff00) >> 8, base + ZXSMB_DAT1);
+		}
+		size = (size == I2C_SMBUS_PROC_CALL) ?
+			ZXSMB_PROC_CALL : ZXSMB_WORD_DATA;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+	case I2C_SMBUS_BLOCK_DATA:
+		len = data->block[0];
+		if (read && size == I2C_SMBUS_I2C_BLOCK_DATA)
+			outb(len, base + ZXSMB_DAT1);
+		outb(command, base + ZXSMB_CMD);
+		/* Reset ZXSMB_BLKDAT */
+		inb(base + ZXSMB_CTL);
+		if (!read) {
+			outb(len, base + ZXSMB_DAT0);
+			outb(0, base + ZXSMB_DAT1);
+			for (i = 1; i <= len; i++)
+				outb(data->block[i], base + ZXSMB_BLKDAT);
+		}
+		size = (size == I2C_SMBUS_I2C_BLOCK_DATA) ?
+			ZXSMB_I2C_BLOCK_DATA : ZXSMB_BLOCK_DATA;
+		break;
+	default:
+		goto exit_unsupported;
+	}
+
+	outb(((addr & 0x7f) << 1) | read, base + ZXSMB_ADD);
+	smb->size = size;
+	smb->pec = flags & I2C_CLIENT_PEC;
+	err = zxsmb_transaction(smb);
+	if (err)
+		return err;
+
+	if ((read == I2C_SMBUS_WRITE) || (size == ZXSMB_QUICK)) {
+		if (unlikely(size == ZXSMB_PROC_CALL))
+			goto prepare_read;
+		return 0;
+	}
+
+prepare_read:
+	switch (size) {
+	case ZXSMB_BYTE:
+	case ZXSMB_BYTE_DATA:
+		data->byte = inb(base + ZXSMB_DAT0);
+		break;
+	case ZXSMB_PROC_CALL:
+	case ZXSMB_WORD_DATA:
+		data->word = inb(base + ZXSMB_DAT0) + (inb(base + ZXSMB_DAT1) << 8);
+		break;
+	case ZXSMB_I2C_BLOCK_DATA:
+	case ZXSMB_BLOCK_DATA:
+		data->block[0] = inb(base + ZXSMB_DAT0);
+		if (data->block[0] > I2C_SMBUS_BLOCK_MAX)
+			data->block[0] = I2C_SMBUS_BLOCK_MAX;
+		/* Reset ZXSMB_BLKDAT */
+		inb(base + ZXSMB_CTL);
+		for (i = 1; i <= data->block[0]; i++)
+			data->block[i] = inb(base + ZXSMB_BLKDAT);
+		break;
+	}
+
+	return 0;
+
+exit_unsupported:
+	dev_err(smb->dev, "unsupported access, size:%x, dir:%s", size, read ? "read" : "write");
+	return -EOPNOTSUPP;
+}
+
+static u32 zxsmb_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm smbus_algorithm = {
+	.smbus_xfer = zxsmb_smbus_xfer,
+	.functionality  = zxsmb_func,
+};
+
+static int zxsmb_probe(struct platform_device *pdev)
+{
+	struct zxsmb *smb;
+	struct resource *res;
+	struct i2c_adapter *adap;
+
+	smb = devm_kzalloc(&pdev->dev, sizeof(*smb), GFP_KERNEL);
+	if (!smb)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (IS_ERR(res))
+		return -ENODEV;
+	smb->base = res->start;
+	if (!devm_request_region(&pdev->dev, res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "Can't get I/O resource\n");
+		return -EBUSY;
+	}
+
+	smb->irq = platform_get_irq(pdev, 0);
+	if (smb->irq < 0 || devm_request_irq(&pdev->dev, smb->irq, zxsmb_irq_handle, IRQF_SHARED,
+			pdev->name, smb)) {
+		dev_warn(&pdev->dev, "failed to request irq %d\n", smb->irq);
+		smb->irq = 0;
+	} else
+		init_completion(&smb->complete);
+
+	smb->dev = &pdev->dev;
+	platform_set_drvdata(pdev, (void *)smb);
+
+	adap = &smb->adap;
+	adap->algo = &smbus_algorithm;
+	adap->retries = 2;
+	adap->owner = THIS_MODULE;
+	adap->dev.parent = &pdev->dev;
+	ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
+	snprintf(adap->name, sizeof(adap->name), "zhaoxin-%s-%s", dev_name(pdev->dev.parent),
+		dev_name(smb->dev));
+	i2c_set_adapdata(&smb->adap, smb);
+
+	return i2c_add_adapter(&smb->adap);
+}
+
+static int zxsmb_remove(struct platform_device *pdev)
+{
+	struct zxsmb *smb = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(&(smb->adap));
+	platform_set_drvdata(pdev, NULL);
+	devm_kfree(&pdev->dev, smb);
+
+	return 0;
+}
+
+static const struct acpi_device_id zxsmb_acpi_match[] = {
+	{"SMB3324", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, zxsmb_acpi_match);
+
+static struct platform_driver zxsmb_driver = {
+	.probe  = zxsmb_probe,
+	.remove = zxsmb_remove,
+	.driver = {
+		.name = ZXSMB_NAME,
+		.acpi_match_table = ACPI_PTR(zxsmb_acpi_match),
+	},
+};
+
+module_platform_driver(zxsmb_driver);
+
+MODULE_AUTHOR("hanshu@zhaoxin.com");
+MODULE_DESCRIPTION("Zhaoxin SMBus driver");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");

From ce59ba758d54a087400e1a767121328e21e5f9d2 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 15 Aug 2024 14:43:15 +0800
Subject: [PATCH 29/39] x86/cpu: Remove pointless evaluation of x86_coreid_bits

mainline inclusion
from mainline-v6.6-rc1
commit <594957d723a0674ca15bfefb755b3403624b8239>

-------------------

cpuinfo_x86::x86_coreid_bits is only used by the AMD numa topology code. No
point in evaluating it on non AMD systems.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230814085112.687588373@linutronix.de

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/cpu/intel.c   | 13 -------------
 arch/x86/kernel/cpu/zhaoxin.c | 13 -------------
 2 files changed, 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index aa3e7ed0eb3d..036f136dc423 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -398,19 +398,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_PGE);
 	}
 
-	if (c->cpuid_level >= 0x00000001) {
-		u32 eax, ebx, ecx, edx;
-
-		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
-		/*
-		 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
-		 * apicids which are reserved per package. Store the resulting
-		 * shift value for the package management code.
-		 */
-		if (edx & (1U << 28))
-			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
-	}
-
 	check_memory_type_self_snoop_errata(c);
 
 	/*
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index 8e4201ad1d23..80b3791240e4 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -66,19 +66,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
 	}
 
-	if (c->cpuid_level >= 0x00000001) {
-		u32 eax, ebx, ecx, edx;
-
-		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
-		/*
-		 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
-		 * apicids which are reserved per package. Store the resulting
-		 * shift value for the package management code.
-		 */
-		if (edx & (1U << 28))
-			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
-	}
-
 	/*
 	 * These CPUs declare support SSE4.2 instruction sets but
 	 * having low performance CRC32C instruction implementation.

From 008053bcdeaad929fa3f96b75eee49da15ab0a04 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 15 Aug 2024 15:13:56 +0800
Subject: [PATCH 30/39] perf/x86/zhaoxin/uncore: update KX-7000 support

zhaoxin inclusion
category: feature

-------------------

1. Enhance perf kvm guest/host support to allow monitoring of either the
   host or guest independently.
2. Add architecture print information after successful loading of the
   KX7000 pmc core driver, indicating that the architecture of KX7000 is
   shijidadao.
3. Modify the KX8000 in the uncore driver to KX7000.
4. Add logic_op support for the KX7000 uncore.
5. For the KX7000 platform, it is necessary to configure the bit16
   (bsPMCDynamicEn_P) of msr1877h to 0 (previously it was defaulted to 1)
   during the PMC driver loading, so that the KX7000 PMC HIF module can
   operate normally.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/events/zhaoxin/core.c   |   84 ++-
 arch/x86/events/zhaoxin/uncore.c | 1049 ++++++++++++++++--------------
 arch/x86/events/zhaoxin/uncore.h |   77 +--
 3 files changed, 647 insertions(+), 563 deletions(-)

diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 2957b416a6db..e493b176b336 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -259,7 +259,10 @@ static void zhaoxin_pmu_disable_all(void)
 
 static void zhaoxin_pmu_enable_all(int added)
 {
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 }
 
 static inline u64 zhaoxin_pmu_get_status(void)
@@ -286,13 +289,31 @@ static inline void zxc_pmu_ack_status(u64 ack)
 	zhaoxin_pmu_disable_all();
 }
 
-static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
+static inline void zhaoxin_set_masks(struct perf_event *event, int idx)
 {
-	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (event->attr.exclude_host)
+		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+	if (event->attr.exclude_guest)
+		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+}
+
+static inline void zhaoxin_clear_masks(struct perf_event *event, int idx)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+}
+
+static void zhaoxin_pmu_disable_fixed(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
 	u64 ctrl_val, mask;
+	int idx = hwc->idx;
 
-	mask = 0xfULL << (idx * 4);
-
+	mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
 	rdmsrl(hwc->config_base, ctrl_val);
 	ctrl_val &= ~mask;
 	wrmsrl(hwc->config_base, ctrl_val);
@@ -301,19 +322,23 @@ static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
 static void zhaoxin_pmu_disable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	zhaoxin_clear_masks(event, idx);
 
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		zhaoxin_pmu_disable_fixed(hwc);
+		zhaoxin_pmu_disable_fixed(event);
 		return;
 	}
 
 	x86_pmu_disable_event(event);
 }
 
-static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
+static void zhaoxin_pmu_enable_fixed(struct perf_event *event)
 {
-	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 ctrl_val, mask, bits = 0;
+	int idx = hwc->idx;
 
 	/*
 	 * Enable IRQ generation (0x8),
@@ -326,6 +351,7 @@ static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
 	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
 		bits |= 0x1;
 
+	idx -= INTEL_PMC_IDX_FIXED;
 	bits <<= (idx * 4);
 	mask = 0xfULL << (idx * 4);
 
@@ -338,9 +364,12 @@ static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
 static void zhaoxin_pmu_enable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	zhaoxin_set_masks(event, idx);
 
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		zhaoxin_pmu_enable_fixed(hwc);
+		zhaoxin_pmu_enable_fixed(event);
 		return;
 	}
 
@@ -456,6 +485,19 @@ static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
+static struct perf_guest_switch_msr *zhaoxin_guest_get_msrs(int *nr, void *data)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+	*nr = 1;
+
+	return arr;
+}
+
 static const struct x86_pmu zhaoxin_pmu __initconst = {
 	.name			= "zhaoxin",
 	.handle_irq		= zhaoxin_pmu_handle_irq,
@@ -478,6 +520,8 @@ static const struct x86_pmu zhaoxin_pmu __initconst = {
 
 	.format_attrs		= zx_arch_formats_attr,
 	.events_sysfs_show	= zhaoxin_event_sysfs_show,
+
+	.guest_get_msrs         = zhaoxin_guest_get_msrs,
 };
 
 static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
@@ -581,8 +625,8 @@ __init int zhaoxin_pmu_init(void)
 
 			x86_pmu.event_constraints = wudaokou_event_constraints;
 
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515;
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES]      = 0x051a;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0x051a;
 
 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
@@ -595,8 +639,8 @@ __init int zhaoxin_pmu_init(void)
 
 			x86_pmu.event_constraints = wudaokou_event_constraints;
 
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515;
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES]      = 0x051a;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0x051a;
 
 			pr_cont("Lujiazui events, ");
 			break;
@@ -604,19 +648,22 @@ __init int zhaoxin_pmu_init(void)
 		case 0x6b:
 			zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
 				X86_CONFIG(.event = 0x02, .umask = 0x01, .inv = 0x01,
-						.cmask = 0x01);
+					   .cmask = 0x01);
 
 			memcpy(hw_cache_event_ids, lujiazui_hw_cache_event_ids,
-					sizeof(hw_cache_event_ids));
+			       sizeof(hw_cache_event_ids));
 
 			x86_pmu.event_constraints = wudaokou_event_constraints;
 
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x051a;
-			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES]      = 0;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0x051a;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
 
 			if (boot_cpu_data.x86_model == 0x5b)
 				pr_cont("Yongfeng events, ");
 
+			if (boot_cpu_data.x86_model == 0x6b)
+				pr_cont("Shijidadao events, ");
+
 			break;
 		default:
 			return -ENODEV;
@@ -639,4 +686,3 @@ __init int zhaoxin_pmu_init(void)
 
 	return 0;
 }
-
diff --git a/arch/x86/events/zhaoxin/uncore.c b/arch/x86/events/zhaoxin/uncore.c
index 8d898a10d953..12f331334c40 100644
--- a/arch/x86/events/zhaoxin/uncore.c
+++ b/arch/x86/events/zhaoxin/uncore.c
@@ -7,7 +7,6 @@ static struct zhaoxin_uncore_type **uncore_msr_uncores = empty_uncore;
 static struct zhaoxin_uncore_type **uncore_pci_uncores = empty_uncore;
 static struct zhaoxin_uncore_type **uncore_mmio_uncores = empty_uncore;
 
-
 static bool pcidrv_registered;
 static struct pci_driver *uncore_pci_driver;
 
@@ -35,7 +34,7 @@ static int kh40000_pcibus_limit[KH40000_MAX_SUBNODE_NUMBER];
 /* KX5000/KX6000 event control */
 #define KX5000_UNC_CTL_EV_SEL_MASK		0x000000ff
 #define KX5000_UNC_CTL_UMASK_MASK		0x0000ff00
-#define KX5000_UNC_CTL_EDGE_DET		(1 << 18)
+#define KX5000_UNC_CTL_EDGE_DET			(1 << 18)
 #define KX5000_UNC_CTL_EN			(1 << 22)
 #define KX5000_UNC_CTL_INVERT			(1 << 23)
 #define KX5000_UNC_CTL_CMASK_MASK		0x7000000
@@ -53,7 +52,7 @@ static int kh40000_pcibus_limit[KH40000_MAX_SUBNODE_NUMBER];
 #define KX5000_UNC_FIXED_CTR_CTRL		0x395
 
 /* KX5000/KX6000 uncore global control */
-#define KX5000_UNC_GLOBAL_CTL_EN_PC_ALL	((1ULL << 4) - 1)
+#define KX5000_UNC_GLOBAL_CTL_EN_PC_ALL		((1ULL << 4) - 1)
 #define KX5000_UNC_GLOBAL_CTL_EN_FC		(1ULL << 32)
 
 /* KX5000/KX6000 uncore register */
@@ -65,15 +64,14 @@ static int kh40000_pcibus_limit[KH40000_MAX_SUBNODE_NUMBER];
 #define KH40000_PMON_CTL_UMASK_MASK		0x0000ff00
 #define KH40000_PMON_CTL_RST			(1 << 17)
 #define KH40000_PMON_CTL_EDGE_DET		(1 << 18)
-#define KH40000_PMON_CTL_EV_SEL_EXT		(1 << 21)
 #define KH40000_PMON_CTL_EN			(1 << 22)
-#define KH40000_PMON_CTL_INVERT		(1 << 23)
-#define KH40000_PMON_CTL_TRESH_MASK		0xff000000
+#define KH40000_PMON_CTL_INVERT			(1 << 23)
+#define KH40000_PMON_CTL_THRESH_MASK		0xff000000
 #define KH40000_PMON_RAW_EVENT_MASK		(KH40000_PMON_CTL_EV_SEL_MASK | \
 						KH40000_PMON_CTL_UMASK_MASK | \
 						KH40000_PMON_CTL_EDGE_DET | \
 						KH40000_PMON_CTL_INVERT | \
-						KH40000_PMON_CTL_TRESH_MASK)
+						KH40000_PMON_CTL_THRESH_MASK)
 
 /* KH40000 LLC register*/
 #define KH40000_LLC_MSR_PMON_CTL0		0x1660
@@ -142,76 +140,94 @@ static int kh40000_pcibus_limit[KH40000_MAX_SUBNODE_NUMBER];
 						KH40000_PMON_BOX_CTL_RST_CTRS | \
 						KH40000_PMON_PCI_BOX_PMON_EN)
 
-/* KX8000 LLC register*/
-#define KX8000_LLC_MSR_PMON_CTL0		0x1979
-#define KX8000_LLC_MSR_PMON_CTR0		0x1975
-#define KX8000_LLC_MSR_PMON_BLK_CTL		0x197e
 
-/* KX8000 MESH register*/
-#define KX8000_MESH_MSR_PMON_CTL0		0x1983
-#define KX8000_MESH_MSR_PMON_CTR0		0x197f
-#define KX8000_MESH_MSR_PMON_BLK_CTL	0x1987
+/* KX7000 event control */
+#define KX7000_PMON_CTL_EV_SEL_MASK		0x000000ff
+#define KX7000_PMON_CTL_UMASK_MASK		0x0000ff00
+#define KX7000_PMON_CTL_RST			(1 << 17)
+#define KX7000_PMON_CTL_EDGE_DET		(1 << 18)
+#define KX7000_PMON_CTL_LOGIC_OP0		(1 << 19)
+#define KX7000_PMON_CTL_LOGIC_OP1		(1 << 21)
+#define KX7000_PMON_CTL_EN			(1 << 22)
+#define KX7000_PMON_CTL_INVERT			(1 << 23)
+#define KX7000_PMON_CTL_THRESH_MASK		0xff000000
+#define KX7000_PMON_RAW_EVENT_MASK		(KX7000_PMON_CTL_EV_SEL_MASK | \
+						KX7000_PMON_CTL_UMASK_MASK | \
+						KX7000_PMON_CTL_EDGE_DET | \
+						KX7000_PMON_CTL_LOGIC_OP0 | \
+						KX7000_PMON_CTL_LOGIC_OP1 | \
+						KX7000_PMON_CTL_INVERT | \
+						KX7000_PMON_CTL_THRESH_MASK)
 
-/* KX8000 HOMESTOP register*/
-#define KX8000_HOMESTOP_MSR_PMON_CTL0	0x196a
-#define KX8000_HOMESTOP_MSR_PMON_CTR0	0x1966
-#define KX8000_HOMESTOP_MSR_PMON_BLK_CTL	0x196e
-#define KX8000_HOMESTOP_MSR_PMON_FIXED_CTR	0x1970
-#define KX8000_HOMESTOP_MSR_PMON_FIXED_CTL	0x1971
+/* KX7000 LLC register*/
+#define KX7000_LLC_MSR_PMON_CTL0		0x1979
+#define KX7000_LLC_MSR_PMON_CTR0		0x1975
+#define KX7000_LLC_MSR_PMON_BLK_CTL		0x197e
 
-/* KX8000 CCDie ZDI_PL register*/
-#define KX8000_CCD_ZDI_PL_MSR_PMON_CTL0	0x1960
-#define KX8000_CCD_ZDI_PL_MSR_PMON_CTR0	0x195c
-#define KX8000_CCD_ZDI_PL_MSR_PMON_BLK_CTL	0x1964
+/* KX7000 MESH register*/
+#define KX7000_MESH_MSR_PMON_CTL0		0x1983
+#define KX7000_MESH_MSR_PMON_CTR0		0x197f
+#define KX7000_MESH_MSR_PMON_BLK_CTL		0x1987
 
-/* KX8000 cIODie ZDI_PL register*/
-#define KX8000_IOD_ZDI_PL_MSR_PMON_CTL0	0x1894
-#define KX8000_IOD_ZDI_PL_MSR_PMON_CTR0	0x1890
-#define KX8000_IOD_ZDI_PL_MSR_PMON_BLK_CTL	0x1898
-#define KX8000_IOD_ZDI_PL_MSR_PMON_FIXED_CTR	0x189A
-#define KX8000_IOD_ZDI_PL_MSR_PMON_FIXED_CTL	0x189B
+/* KX7000 HOMESTOP register*/
+#define KX7000_HOMESTOP_MSR_PMON_CTL0		0x196a
+#define KX7000_HOMESTOP_MSR_PMON_CTR0		0x1966
+#define KX7000_HOMESTOP_MSR_PMON_BLK_CTL	0x196e
+#define KX7000_HOMESTOP_MSR_PMON_FIXED_CTR	0x1970
+#define KX7000_HOMESTOP_MSR_PMON_FIXED_CTL	0x1971
 
-/* KX8000 MC register*/
-#define KX8000_MC_A0_CHy_PMON_FIXED_CTL		0xe30
-#define KX8000_MC_A0_CHy_PMON_FIXED_CTR		0xe08
-#define KX8000_MC_A0_CHy_PMON_CTR0		0xe00
-#define KX8000_MC_A0_CHy_PMON_CTL0		0xe20
-#define KX8000_MC_A0_CHy_PMON_BLK_CTL		0xe34
+/* KX7000 CCDie ZDI_PL register*/
+#define KX7000_CCD_ZDI_PL_MSR_PMON_CTL0		0x1960
+#define KX7000_CCD_ZDI_PL_MSR_PMON_CTR0		0x195c
+#define KX7000_CCD_ZDI_PL_MSR_PMON_BLK_CTL	0x1964
 
-#define KX8000_MC_A1_CHy_PMON_FIXED_CTL		0xe70
-#define KX8000_MC_A1_CHy_PMON_FIXED_CTR		0xe48
-#define KX8000_MC_A1_CHy_PMON_CTR0		0xe40
-#define KX8000_MC_A1_CHy_PMON_CTL0		0xe60
-#define KX8000_MC_A1_CHy_PMON_BLK_CTL		0xe74
+/* KX7000 cIODie ZDI_PL register*/
+#define KX7000_IOD_ZDI_PL_MSR_PMON_CTL0		0x1894
+#define KX7000_IOD_ZDI_PL_MSR_PMON_CTR0		0x1890
+#define KX7000_IOD_ZDI_PL_MSR_PMON_BLK_CTL	0x1898
+#define KX7000_IOD_ZDI_PL_MSR_PMON_FIXED_CTR	0x189A
+#define KX7000_IOD_ZDI_PL_MSR_PMON_FIXED_CTL	0x189B
 
-#define KX8000_MC_B0_CHy_PMON_FIXED_CTL		0xeb0
-#define KX8000_MC_B0_CHy_PMON_FIXED_CTR		0xe88
-#define KX8000_MC_B0_CHy_PMON_CTR0		0xe80
-#define KX8000_MC_B0_CHy_PMON_CTL0		0xea0
-#define KX8000_MC_B0_CHy_PMON_BLK_CTL		0xeb4
+/* KX7000 MC register*/
+#define KX7000_MC_A0_CHy_PMON_FIXED_CTL		0xe30
+#define KX7000_MC_A0_CHy_PMON_FIXED_CTR		0xe08
+#define KX7000_MC_A0_CHy_PMON_CTR0		0xe00
+#define KX7000_MC_A0_CHy_PMON_CTL0		0xe20
+#define KX7000_MC_A0_CHy_PMON_BLK_CTL		0xe34
 
-#define KX8000_MC_B1_CHy_PMON_FIXED_CTL		0xef0
-#define KX8000_MC_B1_CHy_PMON_FIXED_CTR		0xec8
-#define KX8000_MC_B1_CHy_PMON_CTR0		0xec0
-#define KX8000_MC_B1_CHy_PMON_CTL0		0xee0
-#define KX8000_MC_B1_CHy_PMON_BLK_CTL		0xef4
-
-#define KX8000_ZDI_DL_MMIO_PMON_CTR0	0xf00
-#define KX8000_ZDI_DL_MMIO_PMON_CTL0	0xf28
-#define KX8000_ZDI_DL_MMIO_PMON_BLK_CTL 0xf44
-#define KX8000_IOD_ZDI_DL_MMIO_BASE_OFFSET	0x168
-#define KX8000_CCD_ZDI_DL_MMIO_BASE_OFFSET	0x170
-#define KX8000_ZDI_DL_MMIO_BASE_MASK	0x3fff
-#define KX8000_ZDI_DL_MMIO_BASE_MASK	0x3fff
-#define KX8000_ZDI_DL_MMIO_MEM0_MASK	0xfffff000
-#define KX8000_ZDI_DL_MMIO_SIZE			0x1000
+#define KX7000_MC_A1_CHy_PMON_FIXED_CTL		0xe70
+#define KX7000_MC_A1_CHy_PMON_FIXED_CTR		0xe48
+#define KX7000_MC_A1_CHy_PMON_CTR0		0xe40
+#define KX7000_MC_A1_CHy_PMON_CTL0		0xe60
+#define KX7000_MC_A1_CHy_PMON_BLK_CTL		0xe74
 
+#define KX7000_MC_B0_CHy_PMON_FIXED_CTL		0xeb0
+#define KX7000_MC_B0_CHy_PMON_FIXED_CTR		0xe88
+#define KX7000_MC_B0_CHy_PMON_CTR0		0xe80
+#define KX7000_MC_B0_CHy_PMON_CTL0		0xea0
+#define KX7000_MC_B0_CHy_PMON_BLK_CTL		0xeb4
 
+#define KX7000_MC_B1_CHy_PMON_FIXED_CTL		0xef0
+#define KX7000_MC_B1_CHy_PMON_FIXED_CTR		0xec8
+#define KX7000_MC_B1_CHy_PMON_CTR0		0xec0
+#define KX7000_MC_B1_CHy_PMON_CTL0		0xee0
+#define KX7000_MC_B1_CHy_PMON_BLK_CTL		0xef4
 
+#define	KX7000_ZDI_DL_MMIO_PMON_CTR0		0xf00
+#define KX7000_ZDI_DL_MMIO_PMON_CTL0		0xf28
+#define KX7000_ZDI_DL_MMIO_PMON_BLK_CTL		0xf44
+#define KX7000_IOD_ZDI_DL_MMIO_BASE_OFFSET	0x168
+#define KX7000_CCD_ZDI_DL_MMIO_BASE_OFFSET	0x170
+#define KX7000_ZDI_DL_MMIO_BASE_MASK		0x3fff
+#define KX7000_ZDI_DL_MMIO_BASE_MASK		0x3fff
+#define KX7000_ZDI_DL_MMIO_MEM0_MASK		0xfffff000
+#define KX7000_ZDI_DL_MMIO_SIZE			0x1000
 
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(logic_op0, logic_op0, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(logic_op1, logic_op1, "config:21");
 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
 DEFINE_UNCORE_FORMAT_ATTR(cmask3, cmask, "config:24-26");
 DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
@@ -382,24 +398,24 @@ DEFINE_PER_CPU(cpumask_t, zx_subnode_core_bits);
 
 static void zx_gen_core_map(void)
 {
-	int i, nr, cpu;
+	int cpu, i;
 	int cluster_id, subnode_id;
 
 	for_each_present_cpu(cpu) {
 		cluster_id = zx_topology_cluster_id(cpu);
 
-		for (i = 0; i < 4; i++) {
-			nr = (cluster_id << 2) + i;
-			cpumask_set_cpu(nr, &per_cpu(zx_cluster_core_bits, cpu));
+		for_each_present_cpu(i) {
+			if (zx_topology_cluster_id(i) == cluster_id)
+				cpumask_set_cpu(i, &per_cpu(zx_cluster_core_bits, cpu));
 		}
 	}
 
 	for_each_present_cpu(cpu) {
 		subnode_id = zx_topology_subnode_id(cpu);
 
-		for (i = 0; i < 8; i++) {
-			nr = (subnode_id << 3) + i;
-			cpumask_set_cpu(nr, &per_cpu(zx_subnode_core_bits, cpu));
+		for_each_present_cpu(i) {
+			if (zx_topology_subnode_id(i) == subnode_id)
+				cpumask_set_cpu(i, &per_cpu(zx_subnode_core_bits, cpu));
 		}
 	}
 }
@@ -452,8 +468,8 @@ static u64 uncore_msr_read_counter(struct zhaoxin_uncore_box *box, struct perf_e
 	return count;
 }
 
-static void uncore_assign_hw_event(struct zhaoxin_uncore_box *box,
-				   struct perf_event *event, int idx)
+static void uncore_assign_hw_event(struct zhaoxin_uncore_box *box, struct perf_event *event,
+				   int idx)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -495,7 +511,7 @@ again:
 
 /*KX5000/KX6000 uncore ops start*/
 static void kx5000_uncore_msr_disable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+					    struct perf_event *event)
 {
 	wrmsrl(event->hw.config_base, 0);
 }
@@ -511,8 +527,7 @@ static void kx5000_uncore_msr_enable_box(struct zhaoxin_uncore_box *box)
 		KX5000_UNC_GLOBAL_CTL_EN_PC_ALL | KX5000_UNC_GLOBAL_CTL_EN_FC);
 }
 
-static void kx5000_uncore_msr_enable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static void kx5000_uncore_msr_enable_event(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -550,7 +565,7 @@ static struct zhaoxin_uncore_ops kx5000_uncore_msr_ops = {
 
 static struct zhaoxin_uncore_type kx5000_uncore_box = {
 	.name		= "",
-	.num_counters   = 4,
+	.num_counters	= 4,
 	.num_boxes	= 1,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
@@ -572,7 +587,7 @@ static struct zhaoxin_uncore_type *kx5000_msr_uncores[] = {
 
 /*KH40000 msr ops start*/
 static void kh40000_uncore_msr_disable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+					     struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -580,7 +595,7 @@ static void kh40000_uncore_msr_disable_event(struct zhaoxin_uncore_box *box,
 }
 
 static void kh40000_uncore_msr_enable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+					    struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -650,57 +665,57 @@ static struct uncore_event_desc kh40000_uncore_zzi_box_events[] = {
 };
 
 static struct zhaoxin_uncore_ops kh40000_uncore_msr_ops = {
-	.init_box       = kh40000_uncore_msr_init_box,
-	.disable_box    = kh40000_uncore_msr_disable_box,
-	.enable_box     = kh40000_uncore_msr_enable_box,
-	.disable_event  = kh40000_uncore_msr_disable_event,
-	.enable_event   = kh40000_uncore_msr_enable_event,
-	.read_counter   = uncore_msr_read_counter,
+	.init_box	= kh40000_uncore_msr_init_box,
+	.disable_box	= kh40000_uncore_msr_disable_box,
+	.enable_box	= kh40000_uncore_msr_enable_box,
+	.disable_event	= kh40000_uncore_msr_disable_event,
+	.enable_event	= kh40000_uncore_msr_enable_event,
+	.read_counter	= uncore_msr_read_counter,
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_llc_box = {
-	.name           = "llc",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.event_ctl      = KH40000_LLC_MSR_PMON_CTL0,
-	.perf_ctr       = KH40000_LLC_MSR_PMON_CTR0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_LLC_MSR_PMON_BLK_CTL,
-	.event_descs    = kh40000_uncore_llc_box_events,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
+	.name		= "llc",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_ctl	= KH40000_LLC_MSR_PMON_CTL0,
+	.perf_ctr	= KH40000_LLC_MSR_PMON_CTR0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_LLC_MSR_PMON_BLK_CTL,
+	.event_descs	= kh40000_uncore_llc_box_events,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kh40000_uncore_format_group,
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_hif_box = {
-	.name           = "hif",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.event_ctl      = KH40000_HIF_MSR_PMON_CTL0,
-	.perf_ctr       = KH40000_HIF_MSR_PMON_CTR0,
-	.fixed_ctr      = KH40000_HIF_MSR_PMON_FIXED_CTR,
-	.fixed_ctl      = KH40000_HIF_MSR_PMON_FIXED_CTL,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_HIF_MSR_PMON_BLK_CTL,
-	.event_descs    = kh40000_uncore_hif_box_events,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
+	.name		= "hif",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= KH40000_HIF_MSR_PMON_CTL0,
+	.perf_ctr	= KH40000_HIF_MSR_PMON_CTR0,
+	.fixed_ctr	= KH40000_HIF_MSR_PMON_FIXED_CTR,
+	.fixed_ctl	= KH40000_HIF_MSR_PMON_FIXED_CTL,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_HIF_MSR_PMON_BLK_CTL,
+	.event_descs	= kh40000_uncore_hif_box_events,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kh40000_uncore_format_group,
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_zzi_box = {
-	.name           = "zzi",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.event_ctl      = KH40000_ZZI_MSR_PMON_CTL0,
-	.perf_ctr       = KH40000_ZZI_MSR_PMON_CTR0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_ZZI_MSR_PMON_BLK_CTL,
-	.event_descs    = kh40000_uncore_zzi_box_events,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
+	.name		= "zzi",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_ctl	= KH40000_ZZI_MSR_PMON_CTL0,
+	.perf_ctr	= KH40000_ZZI_MSR_PMON_CTR0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_ZZI_MSR_PMON_BLK_CTL,
+	.event_descs	= kh40000_uncore_zzi_box_events,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kh40000_uncore_format_group,
 };
 
 static struct zhaoxin_uncore_type *kh40000_msr_uncores[] = {
@@ -713,7 +728,7 @@ static struct zhaoxin_uncore_type *kh40000_msr_uncores[] = {
 
 /*KH40000 pci ops start*/
 static void kh40000_uncore_pci_disable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+					     struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
@@ -722,7 +737,7 @@ static void kh40000_uncore_pci_disable_event(struct zhaoxin_uncore_box *box,
 }
 
 static void kh40000_uncore_pci_enable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+					    struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
@@ -754,8 +769,7 @@ static void kh40000_uncore_pci_enable_box(struct zhaoxin_uncore_box *box)
 	}
 }
 
-static u64 kh40000_uncore_pci_read_counter(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static u64 kh40000_uncore_pci_read_counter(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
@@ -796,102 +810,102 @@ static struct uncore_event_desc kh40000_uncore_pxptrf_events[] = {
 };
 
 static struct zhaoxin_uncore_ops kh40000_uncore_pci_ops = {
-	.init_box       = kh40000_uncore_pci_init_box,
-	.disable_box    = kh40000_uncore_pci_disable_box,
-	.enable_box     = kh40000_uncore_pci_enable_box,
-	.disable_event  = kh40000_uncore_pci_disable_event,
-	.enable_event   = kh40000_uncore_pci_enable_event,
-	.read_counter   = kh40000_uncore_pci_read_counter
+	.init_box	= kh40000_uncore_pci_init_box,
+	.disable_box	= kh40000_uncore_pci_disable_box,
+	.enable_box	= kh40000_uncore_pci_enable_box,
+	.disable_event	= kh40000_uncore_pci_disable_event,
+	.enable_event	= kh40000_uncore_pci_enable_event,
+	.read_counter	= kh40000_uncore_pci_read_counter
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_mc0 = {
-	.name           = "mc0",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KH40000_MC0_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KH40000_MC0_CHy_PMON_FIXED_CTL,
-	.event_descs    = kh40000_uncore_imc_events,
-	.perf_ctr       = KH40000_MC0_CHy_PMON_CTR0,
-	.event_ctl      = KH40000_MC0_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_MC0_CHy_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "mc0",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KH40000_MC0_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KH40000_MC0_CHy_PMON_FIXED_CTL,
+	.event_descs	= kh40000_uncore_imc_events,
+	.perf_ctr	= KH40000_MC0_CHy_PMON_CTR0,
+	.event_ctl	= KH40000_MC0_CHy_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_MC0_CHy_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_mc1 = {
-	.name           = "mc1",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KH40000_MC1_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KH40000_MC1_CHy_PMON_FIXED_CTL,
-	.event_descs    = kh40000_uncore_imc_events,
-	.perf_ctr       = KH40000_MC1_CHy_PMON_CTR0,
-	.event_ctl      = KH40000_MC1_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_MC1_CHy_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "mc1",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KH40000_MC1_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KH40000_MC1_CHy_PMON_FIXED_CTL,
+	.event_descs	= kh40000_uncore_imc_events,
+	.perf_ctr	= KH40000_MC1_CHy_PMON_CTR0,
+	.event_ctl	= KH40000_MC1_CHy_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_MC1_CHy_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_pci = {
-	.name           = "pci",
-	.num_counters   = 4,
-	.num_boxes      = 10,
-	.perf_ctr_bits  = 48,
-	.event_descs    = kh40000_uncore_pci_events,
-	.perf_ctr       = KH40000_PCI_PMON_CTR0,
-	.event_ctl      = KH40000_PCI_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_PCI_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "pci",
+	.num_counters	= 4,
+	.num_boxes	= 10,
+	.perf_ctr_bits	= 48,
+	.event_descs	= kh40000_uncore_pci_events,
+	.perf_ctr	= KH40000_PCI_PMON_CTR0,
+	.event_ctl	= KH40000_PCI_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_PCI_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_zpi_dll = {
-	.name           = "zpi_dll",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.event_descs    = kh40000_uncore_zpi_dll_events,
-	.perf_ctr       = KH40000_ZPI_DLL_PMON_CTR0,
-	.event_ctl      = KH40000_ZPI_DLL_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_ZPI_DLL_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "zpi_dll",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_descs	= kh40000_uncore_zpi_dll_events,
+	.perf_ctr	= KH40000_ZPI_DLL_PMON_CTR0,
+	.event_ctl	= KH40000_ZPI_DLL_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_ZPI_DLL_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_zdi_dll = {
-	.name           = "zdi_dll",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.event_descs    = kh40000_uncore_zdi_dll_events,
-	.perf_ctr       = KH40000_ZDI_DLL_PMON_CTR0,
-	.event_ctl      = KH40000_ZDI_DLL_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_ZDI_DLL_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "zdi_dll",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_descs	= kh40000_uncore_zdi_dll_events,
+	.perf_ctr	= KH40000_ZDI_DLL_PMON_CTR0,
+	.event_ctl	= KH40000_ZDI_DLL_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_ZDI_DLL_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 static struct zhaoxin_uncore_type kh40000_uncore_pxptrf = {
-	.name           = "pxptrf",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.event_descs    = kh40000_uncore_pxptrf_events,
-	.perf_ctr       = KH40000_PXPTRF_PMON_CTR0,
-	.event_ctl      = KH40000_PXPTRF_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_PXPTRF_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+	.name		= "pxptrf",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_descs	= kh40000_uncore_pxptrf_events,
+	.perf_ctr	= KH40000_PXPTRF_PMON_CTR0,
+	.event_ctl	= KH40000_PXPTRF_PMON_CTL0,
+	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_PXPTRF_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kh40000_uncore_format_group
 };
 
 enum {
@@ -904,12 +918,12 @@ enum {
 };
 
 static struct zhaoxin_uncore_type *kh40000_pci_uncores[] = {
-	[KH40000_PCI_UNCORE_MC0]            = &kh40000_uncore_mc0,
-	[KH40000_PCI_UNCORE_MC1]            = &kh40000_uncore_mc1,
-	[KH40000_PCI_UNCORE_PCI]            = &kh40000_uncore_pci,
-	[KH40000_PCI_UNCORE_ZPI_DLL]        = &kh40000_uncore_zpi_dll,
-	[KH40000_PCI_UNCORE_ZDI_DLL]        = &kh40000_uncore_zdi_dll,
-	[KH40000_PCI_UNCORE_PXPTRF]         = &kh40000_uncore_pxptrf,
+	[KH40000_PCI_UNCORE_MC0]	= &kh40000_uncore_mc0,
+	[KH40000_PCI_UNCORE_MC1]	= &kh40000_uncore_mc1,
+	[KH40000_PCI_UNCORE_PCI]	= &kh40000_uncore_pci,
+	[KH40000_PCI_UNCORE_ZPI_DLL]	= &kh40000_uncore_zpi_dll,
+	[KH40000_PCI_UNCORE_ZDI_DLL]	= &kh40000_uncore_zdi_dll,
+	[KH40000_PCI_UNCORE_PXPTRF]	= &kh40000_uncore_pxptrf,
 	NULL,
 };
 
@@ -988,337 +1002,379 @@ static const struct pci_device_id kh40000_uncore_pci_ids[] = {
 };
 
 static struct pci_driver kh40000_uncore_pci_driver = {
-	.name           = "kh40000_uncore",
-	.id_table       = kh40000_uncore_pci_ids,
+	.name		= "kh40000_uncore",
+	.id_table	= kh40000_uncore_pci_ids,
 };
 /*KH40000 pci ops end*/
 
-
-/*KX8000 msr ops start*/
-static unsigned int kx8000_uncore_msr_offsets[] = {
+/*KX7000 msr ops start*/
+static unsigned int kx7000_uncore_msr_offsets[] = {
 	0x0, 0x13, 0x27, 0x3b, 0x4f, 0x63, 0x77, 0x8b
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_mesh_box = {
-	.name           = "mesh",
-	.num_counters   = 4,
-	.num_boxes      = 8,
-	.perf_ctr_bits  = 48,
-	.event_ctl      = KX8000_MESH_MSR_PMON_CTL0,
-	.perf_ctr       = KX8000_MESH_MSR_PMON_CTR0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_MESH_MSR_PMON_BLK_CTL,
-	.msr_offsets	= kx8000_uncore_msr_offsets,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
-};
-
-static struct zhaoxin_uncore_type kx8000_uncore_llc_box = {
-	.name           = "llc",
-	.num_counters   = 4,
-	.num_boxes      = 8,
-	.perf_ctr_bits  = 48,
-	.event_ctl      = KX8000_LLC_MSR_PMON_CTL0,
-	.perf_ctr       = KX8000_LLC_MSR_PMON_CTR0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_LLC_MSR_PMON_BLK_CTL,
-	.msr_offsets	= kx8000_uncore_msr_offsets,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
-};
-
-static struct zhaoxin_uncore_type kx8000_uncore_homestop = {
-	.name           = "homestop",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.event_ctl      = KX8000_HOMESTOP_MSR_PMON_CTL0,
-	.perf_ctr       = KX8000_HOMESTOP_MSR_PMON_CTR0,
-	.fixed_ctr      = KX8000_HOMESTOP_MSR_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_HOMESTOP_MSR_PMON_FIXED_CTL,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_HOMESTOP_MSR_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
-};
-
-static struct zhaoxin_uncore_type kx8000_uncore_ccd_zdi_pl = {
-	.name           = "ccd_zdi_pl",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.event_ctl      = KX8000_CCD_ZDI_PL_MSR_PMON_CTL0,
-	.perf_ctr       = KX8000_CCD_ZDI_PL_MSR_PMON_CTR0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_CCD_ZDI_PL_MSR_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
-};
-
-static struct zhaoxin_uncore_type kx8000_uncore_iod_zdi_pl = {
-	.name           = "iod_zdi_pl",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.event_ctl      = KX8000_IOD_ZDI_PL_MSR_PMON_CTL0,
-	.perf_ctr       = KX8000_IOD_ZDI_PL_MSR_PMON_CTR0,
-	.fixed_ctr      = KX8000_IOD_ZDI_PL_MSR_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_IOD_ZDI_PL_MSR_PMON_FIXED_CTL,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_IOD_ZDI_PL_MSR_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_msr_ops,
-	.format_group   = &kh40000_uncore_format_group,
-};
-
-
-static struct zhaoxin_uncore_type *kx8000_msr_uncores[] = {
-	&kx8000_uncore_llc_box,
-	&kx8000_uncore_mesh_box,
-	&kh40000_uncore_hif_box,
-	&kx8000_uncore_homestop,
-	&kx8000_uncore_ccd_zdi_pl,
-	&kx8000_uncore_iod_zdi_pl,
+static struct attribute *kx7000_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_logic_op0.attr,
+	&format_attr_logic_op1.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
 	NULL,
 };
-/*KX8000 msr ops end*/
 
-/*KX8000 pci ops start*/
-static unsigned int kx8000_mc_ctr_lh_offsets[] = {
+static struct attribute_group kx7000_uncore_format_group = {
+	.name = "format",
+	.attrs = kx7000_uncore_formats_attr,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_mesh_box = {
+	.name		= "mesh",
+	.num_counters	= 4,
+	.num_boxes	= 8,
+	.perf_ctr_bits	= 48,
+	.event_ctl	= KX7000_MESH_MSR_PMON_CTL0,
+	.perf_ctr	= KX7000_MESH_MSR_PMON_CTR0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_MESH_MSR_PMON_BLK_CTL,
+	.msr_offsets	= kx7000_uncore_msr_offsets,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_llc_box = {
+	.name		= "llc",
+	.num_counters	= 4,
+	.num_boxes	= 8,
+	.perf_ctr_bits	= 48,
+	.event_ctl	= KX7000_LLC_MSR_PMON_CTL0,
+	.perf_ctr	= KX7000_LLC_MSR_PMON_CTR0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_LLC_MSR_PMON_BLK_CTL,
+	.msr_offsets	= kx7000_uncore_msr_offsets,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_hif_box = {
+	.name		= "hif",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= KH40000_HIF_MSR_PMON_CTL0,
+	.perf_ctr	= KH40000_HIF_MSR_PMON_CTR0,
+	.fixed_ctr	= KH40000_HIF_MSR_PMON_FIXED_CTR,
+	.fixed_ctl	= KH40000_HIF_MSR_PMON_FIXED_CTL,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_HIF_MSR_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_homestop = {
+	.name		= "homestop",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= KX7000_HOMESTOP_MSR_PMON_CTL0,
+	.perf_ctr	= KX7000_HOMESTOP_MSR_PMON_CTR0,
+	.fixed_ctr	= KX7000_HOMESTOP_MSR_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_HOMESTOP_MSR_PMON_FIXED_CTL,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_HOMESTOP_MSR_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_ccd_zdi_pl = {
+	.name		= "ccd_zdi_pl",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= KX7000_CCD_ZDI_PL_MSR_PMON_CTL0,
+	.perf_ctr	= KX7000_CCD_ZDI_PL_MSR_PMON_CTR0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_CCD_ZDI_PL_MSR_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+static struct zhaoxin_uncore_type kx7000_uncore_iod_zdi_pl = {
+	.name		= "iod_zdi_pl",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= KX7000_IOD_ZDI_PL_MSR_PMON_CTL0,
+	.perf_ctr	= KX7000_IOD_ZDI_PL_MSR_PMON_CTR0,
+	.fixed_ctr	= KX7000_IOD_ZDI_PL_MSR_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_IOD_ZDI_PL_MSR_PMON_FIXED_CTL,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_IOD_ZDI_PL_MSR_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_msr_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
+
+
+static struct zhaoxin_uncore_type *kx7000_msr_uncores[] = {
+	&kx7000_uncore_llc_box,
+	&kx7000_uncore_mesh_box,
+	&kx7000_uncore_hif_box,
+	&kx7000_uncore_homestop,
+	&kx7000_uncore_ccd_zdi_pl,
+	&kx7000_uncore_iod_zdi_pl,
+	NULL,
+};
+/*KX7000 msr ops end*/
+
+/*KX7000 pci ops start*/
+static unsigned int kx7000_mc_ctr_lh_offsets[] = {
 	0xc, 0xe, 0x10, 0x12, 0x14
 };
 
-static u64 kx8000_uncore_pci_mc_read_counter(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static u64 kx7000_uncore_pci_mc_read_counter(struct zhaoxin_uncore_box *box,
+					     struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
 	u64 count = 0;
 
 	pci_read_config_word(pdev, hwc->event_base, (u16 *)&count + 3);
-	pci_read_config_dword(pdev, hwc->event_base + kx8000_mc_ctr_lh_offsets[hwc->idx],
-		(u32 *)&count);
+	pci_read_config_dword(pdev, hwc->event_base + kx7000_mc_ctr_lh_offsets[hwc->idx],
+			      (u32 *)&count);
 
 	return count;
 }
 
-static struct zhaoxin_uncore_ops kx8000_uncore_pci_mc_ops = {
-	.init_box       = kh40000_uncore_pci_init_box,
-	.disable_box    = kh40000_uncore_pci_disable_box,
-	.enable_box     = kh40000_uncore_pci_enable_box,
-	.disable_event  = kh40000_uncore_pci_disable_event,
-	.enable_event   = kh40000_uncore_pci_enable_event,
-	.read_counter   = kx8000_uncore_pci_mc_read_counter
+static struct zhaoxin_uncore_ops kx7000_uncore_pci_mc_ops = {
+	.init_box	= kh40000_uncore_pci_init_box,
+	.disable_box	= kh40000_uncore_pci_disable_box,
+	.enable_box	= kh40000_uncore_pci_enable_box,
+	.disable_event	= kh40000_uncore_pci_disable_event,
+	.enable_event	= kh40000_uncore_pci_enable_event,
+	.read_counter	= kx7000_uncore_pci_mc_read_counter
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_mc_a0 = {
-	.name           = "mc_a0",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KX8000_MC_A0_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_MC_A0_CHy_PMON_FIXED_CTL,
-	.perf_ctr       = KX8000_MC_A0_CHy_PMON_CTR0,
-	.event_ctl      = KX8000_MC_A0_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_MC_A0_CHy_PMON_BLK_CTL,
-	.ops            = &kx8000_uncore_pci_mc_ops,
-	.format_group   = &kh40000_uncore_format_group
+static struct zhaoxin_uncore_type kx7000_uncore_mc_a0 = {
+	.name		= "mc_a0",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KX7000_MC_A0_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_MC_A0_CHy_PMON_FIXED_CTL,
+	.perf_ctr	= KX7000_MC_A0_CHy_PMON_CTR0,
+	.event_ctl	= KX7000_MC_A0_CHy_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_MC_A0_CHy_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_pci_mc_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_mc_a1 = {
-	.name           = "mc_a1",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KX8000_MC_A1_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_MC_A1_CHy_PMON_FIXED_CTL,
-	.perf_ctr       = KX8000_MC_A1_CHy_PMON_CTR0,
-	.event_ctl      = KX8000_MC_A1_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_MC_A1_CHy_PMON_BLK_CTL,
-	.ops            = &kx8000_uncore_pci_mc_ops,
-	.format_group   = &kh40000_uncore_format_group
+static struct zhaoxin_uncore_type kx7000_uncore_mc_a1 = {
+	.name		= "mc_a1",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KX7000_MC_A1_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_MC_A1_CHy_PMON_FIXED_CTL,
+	.perf_ctr	= KX7000_MC_A1_CHy_PMON_CTR0,
+	.event_ctl	= KX7000_MC_A1_CHy_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_MC_A1_CHy_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_pci_mc_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_mc_b0 = {
-	.name           = "mc_b0",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KX8000_MC_B0_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_MC_B0_CHy_PMON_FIXED_CTL,
-	.perf_ctr       = KX8000_MC_B0_CHy_PMON_CTR0,
-	.event_ctl      = KX8000_MC_B0_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_MC_B0_CHy_PMON_BLK_CTL,
-	.ops            = &kx8000_uncore_pci_mc_ops,
-	.format_group   = &kh40000_uncore_format_group
+static struct zhaoxin_uncore_type kx7000_uncore_mc_b0 = {
+	.name		= "mc_b0",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KX7000_MC_B0_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_MC_B0_CHy_PMON_FIXED_CTL,
+	.perf_ctr	= KX7000_MC_B0_CHy_PMON_CTR0,
+	.event_ctl	= KX7000_MC_B0_CHy_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_MC_B0_CHy_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_pci_mc_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_mc_b1 = {
-	.name           = "mc_b1",
-	.num_counters   = 4,
-	.num_boxes      = 1,
-	.perf_ctr_bits  = 48,
-	.fixed_ctr_bits = 48,
-	.fixed_ctr      = KX8000_MC_B1_CHy_PMON_FIXED_CTR,
-	.fixed_ctl      = KX8000_MC_B1_CHy_PMON_FIXED_CTL,
-	.perf_ctr       = KX8000_MC_B1_CHy_PMON_CTR0,
-	.event_ctl      = KX8000_MC_B1_CHy_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KX8000_MC_B1_CHy_PMON_BLK_CTL,
-	.ops            = &kx8000_uncore_pci_mc_ops,
-	.format_group   = &kh40000_uncore_format_group
+static struct zhaoxin_uncore_type kx7000_uncore_mc_b1 = {
+	.name		= "mc_b1",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= KX7000_MC_B1_CHy_PMON_FIXED_CTR,
+	.fixed_ctl	= KX7000_MC_B1_CHy_PMON_FIXED_CTL,
+	.perf_ctr	= KX7000_MC_B1_CHy_PMON_CTR0,
+	.event_ctl	= KX7000_MC_B1_CHy_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_MC_B1_CHy_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_pci_mc_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_pci = {
-	.name           = "pci",
-	.num_counters   = 4,
-	.num_boxes      = 17,
-	.perf_ctr_bits  = 48,
-	.event_descs    = kh40000_uncore_pci_events,
-	.perf_ctr       = KH40000_PCI_PMON_CTR0,
-	.event_ctl      = KH40000_PCI_PMON_CTL0,
-	.event_mask     = KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl        = KH40000_PCI_PMON_BLK_CTL,
-	.ops            = &kh40000_uncore_pci_ops,
-	.format_group   = &kh40000_uncore_format_group
+static struct zhaoxin_uncore_type kx7000_uncore_pci = {
+	.name		= "pci",
+	.num_counters	= 4,
+	.num_boxes	= 17,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= KH40000_PCI_PMON_CTR0,
+	.event_ctl	= KH40000_PCI_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_PCI_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
+static struct zhaoxin_uncore_type kx7000_uncore_pxptrf = {
+	.name		= "pxptrf",
+	.num_counters	= 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_descs	= kh40000_uncore_pxptrf_events,
+	.perf_ctr	= KH40000_PXPTRF_PMON_CTR0,
+	.event_ctl	= KH40000_PXPTRF_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KH40000_PXPTRF_PMON_BLK_CTL,
+	.ops		= &kh40000_uncore_pci_ops,
+	.format_group	= &kx7000_uncore_format_group,
+};
 
 enum {
-	KX8000_PCI_UNCORE_MC_A0,
-	KX8000_PCI_UNCORE_MC_A1,
-	KX8000_PCI_UNCORE_MC_B0,
-	KX8000_PCI_UNCORE_MC_B1,
-	KX8000_PCI_UNCORE_PCI,
-	KX8000_PCI_UNCORE_PXPTRF,
+	KX7000_PCI_UNCORE_MC_A0,
+	KX7000_PCI_UNCORE_MC_A1,
+	KX7000_PCI_UNCORE_MC_B0,
+	KX7000_PCI_UNCORE_MC_B1,
+	KX7000_PCI_UNCORE_PCI,
+	KX7000_PCI_UNCORE_PXPTRF,
 };
 
-static struct zhaoxin_uncore_type *kx8000_pci_uncores[] = {
-	[KX8000_PCI_UNCORE_MC_A0]            = &kx8000_uncore_mc_a0,
-	[KX8000_PCI_UNCORE_MC_A1]            = &kx8000_uncore_mc_a1,
-	[KX8000_PCI_UNCORE_MC_B0]            = &kx8000_uncore_mc_b0,
-	[KX8000_PCI_UNCORE_MC_B1]            = &kx8000_uncore_mc_b1,
-	[KX8000_PCI_UNCORE_PCI]            = &kx8000_uncore_pci,
-	[KX8000_PCI_UNCORE_PXPTRF]         = &kh40000_uncore_pxptrf,
+static struct zhaoxin_uncore_type *kx7000_pci_uncores[] = {
+	[KX7000_PCI_UNCORE_MC_A0]	= &kx7000_uncore_mc_a0,
+	[KX7000_PCI_UNCORE_MC_A1]	= &kx7000_uncore_mc_a1,
+	[KX7000_PCI_UNCORE_MC_B0]	= &kx7000_uncore_mc_b0,
+	[KX7000_PCI_UNCORE_MC_B1]	= &kx7000_uncore_mc_b1,
+	[KX7000_PCI_UNCORE_PCI]		= &kx7000_uncore_pci,
+	[KX7000_PCI_UNCORE_PXPTRF]	= &kx7000_uncore_pxptrf,
 	NULL,
 };
 
-static const struct pci_device_id kx8000_uncore_pci_ids[] = {
+static const struct pci_device_id kx7000_uncore_pci_ids[] = {
 	{ /* MC Channe A0/A1/B0/B1 */
 		PCI_DEVICE(0x1D17, 0x31B2),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_MC_A0, 0),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_MC_A0, 0),
 	},
 
 	{ /* PCIE D2F0 */
 		PCI_DEVICE(0x1D17, 0x0717),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 0),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 0),
 	},
 
 	{ /* PCIE D2F1 */
 		PCI_DEVICE(0x1D17, 0x0718),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 1),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 1),
 	},
 
 	{ /* PCIE D2F2 */
 		PCI_DEVICE(0x1D17, 0x0733),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 2),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 2),
 	},
 
 	{ /* PCIE D2F3 */
 		PCI_DEVICE(0x1D17, 0x0734),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 3),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 3),
 	},
 
 	{ /* PCIE D3F0 */
 		PCI_DEVICE(0x1D17, 0x0719),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 4),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 4),
 	},
 
 	{ /* PCIE D3F1 */
 		PCI_DEVICE(0x1D17, 0x0735),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 5),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 5),
 	},
 
 	{ /* PCIE D3F2 */
 		PCI_DEVICE(0x1D17, 0x0739),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 6),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 6),
 	},
 
 	{ /* PCIE D3F3 */
 		PCI_DEVICE(0x1D17, 0x073A),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 7),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 7),
 	},
 
 	{ /* PCIE D4F0 */
 		PCI_DEVICE(0x1D17, 0x071B),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 8),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 8),
 	},
 
 	{ /* PCIE D4F1 */
 		PCI_DEVICE(0x1D17, 0x071C),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 9),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 9),
 	},
 
 	{ /* PCIE D4F2 */
 		PCI_DEVICE(0x1D17, 0x0736),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 10),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 10),
 	},
 
 	{ /* PCIE D4F3 */
 		PCI_DEVICE(0x1D17, 0x0737),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 11),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 11),
 	},
 
 	{ /* PCIE D4F4 */
 		PCI_DEVICE(0x1D17, 0x0738),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 12),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 12),
 	},
 
 	{ /* PCIE D5F0 */
 		PCI_DEVICE(0x1D17, 0x071D),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 13),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 13),
 	},
 
 	{ /* PCIE D5F1 */
 		PCI_DEVICE(0x1D17, 0x071E),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 14),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 14),
 	},
 
 	{ /* PCIE D5F2 */
 		PCI_DEVICE(0x1D17, 0x0732),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 15),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 15),
 	},
 
 	{ /* PCIE D5F3 */
 		PCI_DEVICE(0x1D17, 0x073B),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PCI, 16),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PCI, 16),
 	},
 
 	{ /* PXPTRF */
 		PCI_DEVICE(0x1D17, 0x31B4),
-		.driver_data = UNCORE_PCI_DEV_DATA(KX8000_PCI_UNCORE_PXPTRF, 0),
+		.driver_data = UNCORE_PCI_DEV_DATA(KX7000_PCI_UNCORE_PXPTRF, 0),
 	},
 
 	{ /* end: all zeroes */ }
 };
 
-
-static struct pci_driver kx8000_uncore_pci_driver = {
-	.name           = "kx8000_uncore",
-	.id_table       = kx8000_uncore_pci_ids,
+static struct pci_driver kx7000_uncore_pci_driver = {
+	.name = "kx7000_uncore",
+	.id_table = kx7000_uncore_pci_ids,
 };
-/*KX8000 pci ops end*/
+/*KX7000 pci ops end*/
 
-/*KX8000 mmio ops start*/
-static void kx8000_uncore_mmio_init_box(struct zhaoxin_uncore_box *box)
+/*KX7000 mmio ops start*/
+static void kx7000_uncore_mmio_init_box(struct zhaoxin_uncore_box *box)
 {
 	struct pci_dev *pdev = NULL;
 	unsigned int box_ctl = uncore_mmio_box_ctl(box);
@@ -1331,24 +1387,24 @@ static void kx8000_uncore_mmio_init_box(struct zhaoxin_uncore_box *box)
 		return;
 
 	if (!strcmp(box->pmu->name, "iod_zdi_dl"))
-		mmio_base_offset = KX8000_IOD_ZDI_DL_MMIO_BASE_OFFSET;
+		mmio_base_offset = KX7000_IOD_ZDI_DL_MMIO_BASE_OFFSET;
 	else
-		mmio_base_offset = KX8000_CCD_ZDI_DL_MMIO_BASE_OFFSET;
+		mmio_base_offset = KX7000_CCD_ZDI_DL_MMIO_BASE_OFFSET;
 
 	pci_read_config_dword(pdev, mmio_base_offset, &pci_dword);
-	addr = (u64)(pci_dword & KX8000_ZDI_DL_MMIO_BASE_MASK) << 32;
+	addr = (u64)(pci_dword & KX7000_ZDI_DL_MMIO_BASE_MASK) << 32;
 
 	pci_read_config_dword(pdev, mmio_base_offset + 4, &pci_dword);
-	addr |= pci_dword & KX8000_ZDI_DL_MMIO_MEM0_MASK;
+	addr |= pci_dword & KX7000_ZDI_DL_MMIO_MEM0_MASK;
 
-	box->io_addr = ioremap(addr, KX8000_ZDI_DL_MMIO_SIZE);
+	box->io_addr = ioremap(addr, KX7000_ZDI_DL_MMIO_SIZE);
 	if (!box->io_addr)
 		return;
 
 	writel(KH40000_PMON_PCI_BOX_CTL_INT, box->io_addr + box_ctl);
 }
 
-static void kx8000_uncore_mmio_disable_box(struct zhaoxin_uncore_box *box)
+static void kx7000_uncore_mmio_disable_box(struct zhaoxin_uncore_box *box)
 {
 	u32 config;
 	unsigned int box_ctl = uncore_mmio_box_ctl(box);
@@ -1361,7 +1417,7 @@ static void kx8000_uncore_mmio_disable_box(struct zhaoxin_uncore_box *box)
 	writel(config, box->io_addr + box_ctl);
 }
 
-static void kx8000_uncore_mmio_enable_box(struct zhaoxin_uncore_box *box)
+static void kx7000_uncore_mmio_enable_box(struct zhaoxin_uncore_box *box)
 {
 	u32 config;
 	unsigned int box_ctl = uncore_mmio_box_ctl(box);
@@ -1374,8 +1430,8 @@ static void kx8000_uncore_mmio_enable_box(struct zhaoxin_uncore_box *box)
 	writel(config, box->io_addr + box_ctl);
 }
 
-static void kx8000_uncore_mmio_enable_event(struct zhaoxin_uncore_box *box,
-					   struct perf_event *event)
+static void kx7000_uncore_mmio_enable_event(struct zhaoxin_uncore_box *box,
+					    struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -1385,8 +1441,8 @@ static void kx8000_uncore_mmio_enable_event(struct zhaoxin_uncore_box *box,
 	writel(hwc->config | KH40000_PMON_CTL_EN, box->io_addr + hwc->config_base);
 }
 
-static void kx8000_uncore_mmio_disable_event(struct zhaoxin_uncore_box *box,
-					    struct perf_event *event)
+static void kx7000_uncore_mmio_disable_event(struct zhaoxin_uncore_box *box,
+					     struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -1402,8 +1458,7 @@ static void uncore_mmio_exit_box(struct zhaoxin_uncore_box *box)
 		iounmap(box->io_addr);
 }
 
-static u64 uncore_mmio_read_counter(struct zhaoxin_uncore_box *box,
-		struct perf_event *event)
+static u64 uncore_mmio_read_counter(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	u64 count = 0;
 	u64 count_low = 0;
@@ -1419,54 +1474,51 @@ static u64 uncore_mmio_read_counter(struct zhaoxin_uncore_box *box,
 	return count;
 }
 
-static struct zhaoxin_uncore_ops kx8000_uncore_mmio_ops = {
-	.init_box	= kx8000_uncore_mmio_init_box,
+static struct zhaoxin_uncore_ops kx7000_uncore_mmio_ops = {
+	.init_box	= kx7000_uncore_mmio_init_box,
 	.exit_box	= uncore_mmio_exit_box,
-	.disable_box	= kx8000_uncore_mmio_disable_box,
-	.enable_box	= kx8000_uncore_mmio_enable_box,
-	.disable_event	= kx8000_uncore_mmio_disable_event,
-	.enable_event	= kx8000_uncore_mmio_enable_event,
+	.disable_box	= kx7000_uncore_mmio_disable_box,
+	.enable_box	= kx7000_uncore_mmio_enable_box,
+	.disable_event	= kx7000_uncore_mmio_disable_event,
+	.enable_event	= kx7000_uncore_mmio_enable_event,
 	.read_counter	= uncore_mmio_read_counter,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_iod_zdi_dl = {
+static struct zhaoxin_uncore_type kx7000_uncore_iod_zdi_dl = {
 	.name		= "iod_zdi_dl",
 	.num_counters   = 4,
 	.num_boxes	= 1,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
-	.perf_ctr	= KX8000_ZDI_DL_MMIO_PMON_CTR0,
-	.event_ctl	= KX8000_ZDI_DL_MMIO_PMON_CTL0,
-	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl	= KX8000_ZDI_DL_MMIO_PMON_BLK_CTL,
-	.ops		= &kx8000_uncore_mmio_ops,
-	.format_group	= &kh40000_uncore_format_group,
+	.perf_ctr	= KX7000_ZDI_DL_MMIO_PMON_CTR0,
+	.event_ctl	= KX7000_ZDI_DL_MMIO_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_ZDI_DL_MMIO_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_mmio_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type kx8000_uncore_ccd_zdi_dl = {
+static struct zhaoxin_uncore_type kx7000_uncore_ccd_zdi_dl = {
 	.name		= "ccd_zdi_dl",
 	.num_counters   = 4,
 	.num_boxes	= 1,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
-	.perf_ctr	= KX8000_ZDI_DL_MMIO_PMON_CTR0,
-	.event_ctl	= KX8000_ZDI_DL_MMIO_PMON_CTL0,
-	.event_mask	= KH40000_PMON_RAW_EVENT_MASK,
-	.box_ctl	= KX8000_ZDI_DL_MMIO_PMON_BLK_CTL,
-	.ops		= &kx8000_uncore_mmio_ops,
-	.format_group	= &kh40000_uncore_format_group,
+	.perf_ctr	= KX7000_ZDI_DL_MMIO_PMON_CTR0,
+	.event_ctl	= KX7000_ZDI_DL_MMIO_PMON_CTL0,
+	.event_mask	= KX7000_PMON_RAW_EVENT_MASK,
+	.box_ctl	= KX7000_ZDI_DL_MMIO_PMON_BLK_CTL,
+	.ops		= &kx7000_uncore_mmio_ops,
+	.format_group	= &kx7000_uncore_format_group,
 };
 
-static struct zhaoxin_uncore_type *kx8000_mmio_uncores[] = {
-	&kx8000_uncore_iod_zdi_dl,
-	&kx8000_uncore_ccd_zdi_dl,
+static struct zhaoxin_uncore_type *kx7000_mmio_uncores[] = {
+	&kx7000_uncore_iod_zdi_dl,
+	&kx7000_uncore_ccd_zdi_dl,
 	NULL,
 };
 
-/*KX8000 mmio ops end*/
-
-
-
+/*KX7000 mmio ops end*/
 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
 {
 	struct zhaoxin_uncore_box *box;
@@ -1517,8 +1569,7 @@ static void uncore_pmu_init_hrtimer(struct zhaoxin_uncore_box *box)
 	box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-static struct zhaoxin_uncore_box *uncore_alloc_box(struct zhaoxin_uncore_type *type,
-						int node)
+static struct zhaoxin_uncore_box *uncore_alloc_box(struct zhaoxin_uncore_type *type, int node)
 {
 	int i, size, numshared = type->num_shared_regs;
 	struct zhaoxin_uncore_box *box;
@@ -1551,9 +1602,8 @@ static bool is_box_event(struct zhaoxin_uncore_box *box, struct perf_event *even
 	return &box->pmu->pmu == event->pmu;
 }
 
-static int
-uncore_collect_events(struct zhaoxin_uncore_box *box, struct perf_event *leader,
-				bool dogrp)
+static int uncore_collect_events(struct zhaoxin_uncore_box *box, struct perf_event *leader,
+				 bool dogrp)
 {
 	struct perf_event *event;
 	int n, max_count;
@@ -1589,8 +1639,8 @@ uncore_collect_events(struct zhaoxin_uncore_box *box, struct perf_event *leader,
 	return n;
 }
 
-static struct event_constraint *
-uncore_get_event_constraint(struct zhaoxin_uncore_box *box, struct perf_event *event)
+static struct event_constraint *uncore_get_event_constraint(struct zhaoxin_uncore_box *box,
+							    struct perf_event *event)
 {
 	struct zhaoxin_uncore_type *type = box->pmu->type;
 	struct event_constraint *c;
@@ -1614,8 +1664,7 @@ uncore_get_event_constraint(struct zhaoxin_uncore_box *box, struct perf_event *e
 	return &type->unconstrainted;
 }
 
-static void uncore_put_event_constraint(struct zhaoxin_uncore_box *box,
-					struct perf_event *event)
+static void uncore_put_event_constraint(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	if (box->pmu->type->ops->put_constraint)
 		box->pmu->type->ops->put_constraint(box, event);
@@ -1812,8 +1861,7 @@ static void uncore_pmu_event_read(struct perf_event *event)
 	uncore_perf_event_update(box, event);
 }
 
-static int uncore_validate_group(struct zhaoxin_uncore_pmu *pmu,
-				struct perf_event *event)
+static int uncore_validate_group(struct zhaoxin_uncore_pmu *pmu, struct perf_event *event)
 {
 	struct perf_event *leader = event->group_leader;
 	struct zhaoxin_uncore_box *fake_box;
@@ -1950,8 +1998,7 @@ static void uncore_pmu_disable(struct pmu *pmu)
 		uncore_pmu->type->ops->disable_box(box);
 }
 
-static ssize_t cpumask_show(struct device *dev,
-					struct device_attribute *attr, char *buf)
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	cpumask_t *active_mask;
 	struct pmu *pmu;
@@ -1968,6 +2015,7 @@ static ssize_t cpumask_show(struct device *dev,
 	} else {
 		active_mask = &uncore_cpu_mask;
 	}
+
 	return cpumap_print_to_pagebuf(true, buf, active_mask);
 }
 static DEVICE_ATTR_RO(cpumask);
@@ -2018,6 +2066,7 @@ static int uncore_pmu_register(struct zhaoxin_uncore_pmu *pmu)
 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 	if (!ret)
 		pmu->registered = true;
+
 	return ret;
 }
 
@@ -2136,8 +2185,7 @@ err:
 	return -ENOMEM;
 }
 
-static int __init
-uncore_types_init(struct zhaoxin_uncore_type **types, bool setid)
+static int __init uncore_types_init(struct zhaoxin_uncore_type **types, bool setid)
 {
 	int ret;
 
@@ -2173,7 +2221,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 		strscpy(mc_dev, "mc0", sizeof("mc0"));
 		if (!strcmp(type->name, mc_dev))
 			loop = 2;
-	} else if (boot_cpu_data.x86_model == ZHAOXIN_FAM7_KX8000) {
+	} else if (boot_cpu_data.x86_model == ZHAOXIN_FAM7_KX7000) {
 		strscpy(mc_dev, "mc_a0", sizeof("mc_a0"));
 		if (!strcmp(type->name, mc_dev))
 			loop = 4;
@@ -2252,14 +2300,13 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 			loop = 2;
 		else
 			loop = 1;
-	} else if (boot_cpu_data.x86_model == ZHAOXIN_FAM7_KX8000) {
+	} else if (boot_cpu_data.x86_model == ZHAOXIN_FAM7_KX7000) {
 		if (!strcmp(boxes[0]->pmu->type->name, "mc_a0"))
 			loop = 4;
 		else
 			loop = 1;
 	}
 
-
 	for (i = 0; i < loop; i++) {
 		box = boxes[i];
 		pmu = box->pmu;
@@ -2313,8 +2360,7 @@ static void uncore_pci_exit(void)
 	}
 }
 
-static void uncore_change_type_ctx(struct zhaoxin_uncore_type *type, int old_cpu,
-				int new_cpu)
+static void uncore_change_type_ctx(struct zhaoxin_uncore_type *type, int old_cpu, int new_cpu)
 {
 	struct zhaoxin_uncore_pmu *pmu = type->pmus;
 	struct zhaoxin_uncore_box *box;
@@ -2360,8 +2406,7 @@ static void uncore_change_type_ctx(struct zhaoxin_uncore_type *type, int old_cpu
 	}
 }
 
-static void uncore_change_context(struct zhaoxin_uncore_type **uncores,
-				int old_cpu, int new_cpu)
+static void uncore_change_context(struct zhaoxin_uncore_type **uncores, int old_cpu, int new_cpu)
 {
 	for (; *uncores; uncores++)
 		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
@@ -2467,7 +2512,6 @@ static void kh40000_event_cpu_offline(int cpu)
 	} else {
 		uncore_box_unref(uncore_msr_subnode_uncores, subnode_id);
 	}
-
 }
 
 static int uncore_event_cpu_offline(unsigned int cpu)
@@ -2484,8 +2528,8 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 	return 0;
 }
 
-static int kx5000_allocate_boxes(struct zhaoxin_uncore_type **types,
-			unsigned int id, unsigned int cpu)
+static int kx5000_allocate_boxes(struct zhaoxin_uncore_type **types, unsigned int id,
+				 unsigned int cpu)
 {
 	struct zhaoxin_uncore_box *box, *tmp;
 	struct zhaoxin_uncore_type *type;
@@ -2525,8 +2569,8 @@ cleanup:
 	return -ENOMEM;
 }
 
-static int kh40000_allocate_boxes(struct zhaoxin_uncore_type **types,
-			unsigned int id, unsigned int cpu)
+static int kh40000_allocate_boxes(struct zhaoxin_uncore_type **types, unsigned int id,
+				  unsigned int cpu)
 {
 	struct zhaoxin_uncore_box *box, *tmp;
 	struct zhaoxin_uncore_type *type;
@@ -2568,8 +2612,7 @@ cleanup:
 	return -ENOMEM;
 }
 
-static int uncore_box_ref(struct zhaoxin_uncore_type **types,
-			int id, unsigned int cpu)
+static int uncore_box_ref(struct zhaoxin_uncore_type **types, int id, unsigned int cpu)
 {
 	struct zhaoxin_uncore_type *type;
 	struct zhaoxin_uncore_pmu *pmu;
@@ -2789,39 +2832,49 @@ static const struct zhaoxin_uncore_init_fun kh40000_uncore_init __initconst = {
 	.pci_init = kh40000_uncore_pci_init,
 };
 
-void kx8000_uncore_cpu_init(void)
+void kx7000_uncore_cpu_init(void)
 {
-	uncore_msr_uncores = kx8000_msr_uncores;
+	u64 val;
+	int cpu;
+
+	uncore_msr_uncores = kx7000_msr_uncores;
+
+	/* clear bit 16 of MSR 0x1877 so that HIF can work normally */
+	for_each_present_cpu(cpu) {
+		rdmsrl_on_cpu(cpu, 0x1877, &val);
+		val = val & 0xfffffffffffeffffULL;
+		wrmsrl_on_cpu(cpu, 0x1877, val);
+	}
 }
 
-int kx8000_uncore_pci_init(void)
+int kx7000_uncore_pci_init(void)
 {
-	uncore_pci_uncores = kx8000_pci_uncores;
-	uncore_pci_driver = &kx8000_uncore_pci_driver;
+	uncore_pci_uncores = kx7000_pci_uncores;
+	uncore_pci_driver = &kx7000_uncore_pci_driver;
 
 	return 0;
 }
 
-void kx8000_uncore_mmio_init(void)
+void kx7000_uncore_mmio_init(void)
 {
-	uncore_mmio_uncores = kx8000_mmio_uncores;
+	uncore_mmio_uncores = kx7000_mmio_uncores;
 }
 
-static const struct zhaoxin_uncore_init_fun kx8000_uncore_init __initconst = {
-	.cpu_init = kx8000_uncore_cpu_init,
-	.pci_init = kx8000_uncore_pci_init,
-	.mmio_init = kx8000_uncore_mmio_init,
+static const struct zhaoxin_uncore_init_fun kx7000_uncore_init __initconst = {
+	.cpu_init = kx7000_uncore_cpu_init,
+	.pci_init = kx7000_uncore_pci_init,
+	.mmio_init = kx7000_uncore_mmio_init,
 };
 
 static const struct x86_cpu_id zhaoxin_uncore_match[] __initconst = {
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, ZHAOXIN_FAM7_KX5000, &kx5000_uncore_init),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, ZHAOXIN_FAM7_KX6000, &kx5000_uncore_init),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, ZHAOXIN_FAM7_KH40000, &kh40000_uncore_init),
-	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, ZHAOXIN_FAM7_KX8000, &kx8000_uncore_init),
+	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, ZHAOXIN_FAM7_KX7000, &kx7000_uncore_init),
 	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, ZHAOXIN_FAM7_KX5000, &kx5000_uncore_init),
 	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, ZHAOXIN_FAM7_KX6000, &kx5000_uncore_init),
 	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, ZHAOXIN_FAM7_KH40000, &kh40000_uncore_init),
-	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, ZHAOXIN_FAM7_KX8000, &kx8000_uncore_init),
+	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, ZHAOXIN_FAM7_KX7000, &kx7000_uncore_init),
 	{},
 };
 MODULE_DEVICE_TABLE(x86cpu, zhaoxin_uncore_match);
diff --git a/arch/x86/events/zhaoxin/uncore.h b/arch/x86/events/zhaoxin/uncore.h
index 5d09696f8bc7..43ea06364175 100644
--- a/arch/x86/events/zhaoxin/uncore.h
+++ b/arch/x86/events/zhaoxin/uncore.h
@@ -9,10 +9,8 @@
 
 #define ZHAOXIN_FAM7_KX5000		0x1b
 #define ZHAOXIN_FAM7_KX6000		0x3b
-#define ZHAOXIN_FAM7_KH40000	0x5b
-#define ZHAOXIN_FAM7_KX8000		0x6b
-
-
+#define ZHAOXIN_FAM7_KH40000		0x5b
+#define ZHAOXIN_FAM7_KX7000		0x6b
 
 #define UNCORE_PMU_NAME_LEN		32
 #define UNCORE_PMU_HRTIMER_INTERVAL	(60LL * NSEC_PER_SEC)
@@ -82,14 +80,14 @@ struct zhaoxin_uncore_ops {
 };
 
 struct zhaoxin_uncore_pmu {
-	struct pmu			pmu;
-	char				name[UNCORE_PMU_NAME_LEN];
-	int				pmu_idx;
-	int				func_id;
-	bool				registered;
-	atomic_t			activeboxes;
-	struct zhaoxin_uncore_type	*type;
-	struct zhaoxin_uncore_box	**boxes;
+	struct pmu pmu;
+	char name[UNCORE_PMU_NAME_LEN];
+	int pmu_idx;
+	int func_id;
+	bool registered;
+	atomic_t activeboxes;
+	struct zhaoxin_uncore_type *type;
+	struct zhaoxin_uncore_box **boxes;
 };
 
 struct zhaoxin_uncore_extra_reg {
@@ -123,7 +121,7 @@ struct zhaoxin_uncore_box {
 	struct zhaoxin_uncore_extra_reg shared_regs[];
 };
 
-#define UNCORE_BOX_FLAG_INITIATED	0
+#define UNCORE_BOX_FLAG_INITIATED 0
 
 struct uncore_event_desc {
 	struct device_attribute attr;
@@ -135,8 +133,7 @@ struct hw_info {
 	u64 active_state;
 };
 
-ssize_t zx_uncore_event_show(struct device *dev,
-			  struct device_attribute *attr, char *buf);
+ssize_t zx_uncore_event_show(struct device *dev, struct device_attribute *attr, char *buf);
 
 #define ZHAOXIN_UNCORE_EVENT_DESC(_name, _config)			\
 {								\
@@ -160,8 +157,7 @@ static inline bool uncore_pmc_fixed(int idx)
 	return idx == UNCORE_PMC_IDX_FIXED;
 }
 
-static inline
-unsigned int uncore_mmio_box_ctl(struct zhaoxin_uncore_box *box)
+static inline unsigned int uncore_mmio_box_ctl(struct zhaoxin_uncore_box *box)
 {
 	return box->pmu->type->box_ctl +
 	       box->pmu->type->mmio_offset * box->pmu->pmu_idx;
@@ -182,14 +178,12 @@ static inline unsigned int uncore_pci_fixed_ctr(struct zhaoxin_uncore_box *box)
 	return box->pmu->type->fixed_ctr;
 }
 
-static inline
-unsigned int uncore_pci_event_ctl(struct zhaoxin_uncore_box *box, int idx)
+static inline unsigned int uncore_pci_event_ctl(struct zhaoxin_uncore_box *box, int idx)
 {
 	return idx * 4 + box->pmu->type->event_ctl;
 }
 
-static inline
-unsigned int uncore_pci_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
+static inline unsigned int uncore_pci_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
 {
 	if (!strncmp(box->pmu->type->name, "mc_", 3))
 		return idx * 2 + box->pmu->type->perf_ctr;
@@ -225,24 +219,21 @@ static inline unsigned int uncore_msr_fixed_ctr(struct zhaoxin_uncore_box *box)
 	return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
 }
 
-static inline
-unsigned int uncore_msr_event_ctl(struct zhaoxin_uncore_box *box, int idx)
+static inline unsigned int uncore_msr_event_ctl(struct zhaoxin_uncore_box *box, int idx)
 {
 	return box->pmu->type->event_ctl +
 		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
 		uncore_msr_box_offset(box);
 }
 
-static inline
-unsigned int uncore_msr_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
+static inline unsigned int uncore_msr_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
 {
 	return box->pmu->type->perf_ctr +
 		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
 		uncore_msr_box_offset(box);
 }
 
-static inline
-unsigned int uncore_fixed_ctl(struct zhaoxin_uncore_box *box)
+static inline unsigned int uncore_fixed_ctl(struct zhaoxin_uncore_box *box)
 {
 	if (box->pci_dev)
 		return uncore_pci_fixed_ctl(box);
@@ -250,8 +241,7 @@ unsigned int uncore_fixed_ctl(struct zhaoxin_uncore_box *box)
 		return uncore_msr_fixed_ctl(box);
 }
 
-static inline
-unsigned int uncore_fixed_ctr(struct zhaoxin_uncore_box *box)
+static inline unsigned int uncore_fixed_ctr(struct zhaoxin_uncore_box *box)
 {
 	if (box->pci_dev)
 		return uncore_pci_fixed_ctr(box);
@@ -259,17 +249,17 @@ unsigned int uncore_fixed_ctr(struct zhaoxin_uncore_box *box)
 		return uncore_msr_fixed_ctr(box);
 }
 
-static inline
-unsigned int uncore_event_ctl(struct zhaoxin_uncore_box *box, int idx)
-{	if (box->pci_dev || box->io_addr)
+static inline unsigned int uncore_event_ctl(struct zhaoxin_uncore_box *box, int idx)
+{
+	if (box->pci_dev || box->io_addr)
 		return uncore_pci_event_ctl(box, idx);
 	else
 		return uncore_msr_event_ctl(box, idx);
 }
 
-static inline
-unsigned int uncore_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
-{	if (box->pci_dev || box->io_addr)
+static inline unsigned int uncore_perf_ctr(struct zhaoxin_uncore_box *box, int idx)
+{
+	if (box->pci_dev || box->io_addr)
 		return uncore_pci_perf_ctr(box, idx);
 	else
 		return uncore_msr_perf_ctr(box, idx);
@@ -302,20 +292,17 @@ static inline void uncore_enable_box(struct zhaoxin_uncore_box *box)
 		box->pmu->type->ops->enable_box(box);
 }
 
-static inline void uncore_disable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static inline void uncore_disable_event(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	box->pmu->type->ops->disable_event(box, event);
 }
 
-static inline void uncore_enable_event(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static inline void uncore_enable_event(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	box->pmu->type->ops->enable_event(box, event);
 }
 
-static inline u64 uncore_read_counter(struct zhaoxin_uncore_box *box,
-				struct perf_event *event)
+static inline u64 uncore_read_counter(struct zhaoxin_uncore_box *box, struct perf_event *event)
 {
 	return box->pmu->type->ops->read_counter(box, event);
 }
@@ -351,12 +338,10 @@ static inline struct zhaoxin_uncore_box *uncore_event_to_box(struct perf_event *
 	return event->pmu_private;
 }
 
-
 static struct zhaoxin_uncore_box *uncore_pmu_to_box(struct zhaoxin_uncore_pmu *pmu, int cpu);
 static u64 uncore_msr_read_counter(struct zhaoxin_uncore_box *box, struct perf_event *event);
 static void uncore_mmio_exit_box(struct zhaoxin_uncore_box *box);
-static u64 uncore_mmio_read_counter(struct zhaoxin_uncore_box *box,
-			     struct perf_event *event);
+static u64 uncore_mmio_read_counter(struct zhaoxin_uncore_box *box, struct perf_event *event);
 static void uncore_pmu_start_hrtimer(struct zhaoxin_uncore_box *box);
 static void uncore_pmu_cancel_hrtimer(struct zhaoxin_uncore_box *box);
 static void uncore_pmu_event_start(struct perf_event *event, int flags);
@@ -365,7 +350,7 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags);
 static void uncore_pmu_event_del(struct perf_event *event, int flags);
 static void uncore_pmu_event_read(struct perf_event *event);
 static void uncore_perf_event_update(struct zhaoxin_uncore_box *box, struct perf_event *event);
-struct event_constraint *
-uncore_get_constraint(struct zhaoxin_uncore_box *box, struct perf_event *event);
+struct event_constraint *uncore_get_constraint(struct zhaoxin_uncore_box *box,
+					       struct perf_event *event);
 void uncore_put_constraint(struct zhaoxin_uncore_box *box, struct perf_event *event);
 u64 uncore_shared_reg_config(struct zhaoxin_uncore_box *box, int idx);

From 69099183e8f3f1854d839f9ee2f1666fb0c1717f Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 15 Aug 2024 15:25:52 +0800
Subject: [PATCH 31/39] x86/mce: Set bios_cmci_threshold for CMCI threshold

zhaoxin inclusion
category: feature

-------------------

In the Linux kernel, the CMCI threshold is set to 1 by default. This patch
prevents Linux from overwriting the CMCI threshold set by the bios. With
this patch, the CMCI threshold can be set through the BIOS, which can also
avoid CMCI storms, on Zhaoxin/Centaur CPUs.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/cpu/mce/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2cafc35f3b7b..c9c9ebbb3268 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1946,6 +1946,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 			if (cfg->monarch_timeout < 0)
 				cfg->monarch_timeout = USEC_PER_SEC;
 		}
+		mca_cfg.bios_cmci_threshold = 1;
 	}
 
 	if (cfg->monarch_timeout < 0)

From a74c780bbd53d4c863bc0237759b1abc65e27c23 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 7 Mar 2024 16:10:15 +0800
Subject: [PATCH 32/39] anolis: Add support for Zhaoxin Serial ATA IDE.

With this driver, Serial ATA device can run in IDE mode on Zhaoxin CPUs.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
---
 drivers/ata/Kconfig        |   9 +
 drivers/ata/Makefile       |   1 +
 drivers/ata/sata_zhaoxin.c | 390 +++++++++++++++++++++++++++++++++++++
 3 files changed, 400 insertions(+)
 create mode 100644 drivers/ata/sata_zhaoxin.c

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 42b51c9812a0..0fd5a5bce3e4 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -553,6 +553,15 @@ config SATA_VITESSE
 
 	  If unsure, say N.
 
+config SATA_ZHAOXIN
+	tristate "ZhaoXin SATA support"
+	depends on PCI
+	select SATA_HOST
+	help
+	  This option enables support for ZhaoXin Serial ATA.
+
+	  If unsure, say N.
+
 comment "PATA SFF controllers with BMDMA"
 
 config PATA_ALI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 20e6645ab737..4b846692e365 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_SATA_SIL)		+= sata_sil.o
 obj-$(CONFIG_SATA_SIS)		+= sata_sis.o
 obj-$(CONFIG_SATA_SVW)		+= sata_svw.o
 obj-$(CONFIG_SATA_ULI)		+= sata_uli.o
+obj-$(CONFIG_SATA_ZHAOXIN)	+= sata_zhaoxin.o
 obj-$(CONFIG_SATA_VIA)		+= sata_via.o
 obj-$(CONFIG_SATA_VITESSE)	+= sata_vsc.o
 
diff --git a/drivers/ata/sata_zhaoxin.c b/drivers/ata/sata_zhaoxin.c
new file mode 100644
index 000000000000..53c3e2ab6095
--- /dev/null
+++ b/drivers/ata/sata_zhaoxin.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  sata_zhaoxin.c - ZhaoXin Serial ATA controllers
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <linux/libata.h>
+
+#define DRV_NAME	"sata_zx"
+#define DRV_VERSION	"2.6.1"
+
+#define PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL 9002
+#define PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL 9003
+
+enum board_ids_enum {
+	zx100s,
+};
+
+enum {
+	SATA_CHAN_ENAB		= 0x40, /* SATA channel enable */
+	SATA_INT_GATE		= 0x41, /* SATA interrupt gating */
+	SATA_NATIVE_MODE	= 0x42, /* Native mode enable */
+	PATA_UDMA_TIMING	= 0xB3, /* PATA timing for DMA/ cable detect */
+	PATA_PIO_TIMING		= 0xAB, /* PATA timing register */
+
+	PORT0			= (1 << 1),
+	PORT1			= (1 << 0),
+	ALL_PORTS		= PORT0 | PORT1,
+
+	NATIVE_MODE_ALL		= (1 << 7) | (1 << 6) | (1 << 5) | (1 << 4),
+
+	SATA_EXT_PHY		= (1 << 6), /* 0==use PATA, 1==ext phy */
+};
+
+static int zx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
+static int zx_scr_read(struct ata_link *link, unsigned int scr, u32 *val);
+static int zx_scr_write(struct ata_link *link, unsigned int scr, u32 val);
+static int zx_hardreset(struct ata_link *link, unsigned int *class,
+				unsigned long deadline);
+
+static void zx_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
+
+static const struct pci_device_id zx_pci_tbl[] = {
+	{ PCI_VDEVICE(ZHAOXIN, PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL), zx100s },
+	{ PCI_VDEVICE(ZHAOXIN, PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL), zx100s },
+
+	{ }	/* terminate list */
+};
+
+static struct pci_driver zx_pci_driver = {
+	.name			= DRV_NAME,
+	.id_table		= zx_pci_tbl,
+	.probe			= zx_init_one,
+#ifdef CONFIG_PM_SLEEP
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
+#endif
+	.remove			= ata_pci_remove_one,
+};
+
+static struct scsi_host_template zx_sht = {
+	ATA_BMDMA_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations zx_base_ops = {
+	.inherits		= &ata_bmdma_port_ops,
+	.sff_tf_load		= zx_tf_load,
+};
+
+static struct ata_port_operations zx_ops = {
+	.inherits		= &zx_base_ops,
+	.hardreset		= zx_hardreset,
+	.scr_read		= zx_scr_read,
+	.scr_write		= zx_scr_write,
+};
+
+static struct ata_port_info zx100s_port_info = {
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_SLAVE_POSS,
+	.pio_mask	= ATA_PIO4,
+	.mwdma_mask	= ATA_MWDMA2,
+	.udma_mask	= ATA_UDMA6,
+	.port_ops	= &zx_ops,
+};
+
+
+static int zx_hardreset(struct ata_link *link, unsigned int *class,
+				unsigned long deadline)
+{
+	int rc;
+
+	rc = sata_std_hardreset(link, class, deadline);
+	if (!rc || rc == -EAGAIN) {
+		struct ata_port *ap = link->ap;
+		int pmp = link->pmp;
+		int tmprc;
+
+		if (pmp) {
+			ap->ops->sff_dev_select(ap, pmp);
+			tmprc = ata_sff_wait_ready(&ap->link, deadline);
+		} else {
+			tmprc = ata_sff_wait_ready(link, deadline);
+		}
+		if (tmprc)
+			ata_link_err(link, "COMRESET failed for wait (errno=%d)\n",
+					rc);
+		else
+			ata_link_err(link, "wait for bsy success\n");
+
+		ata_link_err(link, "COMRESET success (errno=%d) ap=%d link %d\n",
+					rc, link->ap->port_no, link->pmp);
+	} else {
+		ata_link_err(link, "COMRESET failed (errno=%d) ap=%d link %d\n",
+					rc, link->ap->port_no, link->pmp);
+	}
+	return rc;
+}
+
+static int zx_scr_read(struct ata_link *link, unsigned int scr, u32 *val)
+{
+	static const u8 ipm_tbl[] = { 1, 2, 6, 0 };
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	int slot = 2 * link->ap->port_no + link->pmp;
+	u32 v = 0;
+	u8 raw;
+
+	switch (scr) {
+	case SCR_STATUS:
+		pci_read_config_byte(pdev, 0xA0 + slot, &raw);
+
+		/* read the DET field, bit0 and 1 of the config byte */
+		v |= raw & 0x03;
+
+		/* read the SPD field, bit4 of the configure byte */
+		v |= raw & 0x30;
+
+		/* read the IPM field, bit2 and 3 of the config byte */
+		v |= ((ipm_tbl[(raw >> 2) & 0x3])<<8);
+		break;
+
+	case SCR_ERROR:
+		/* devices other than 5287 uses 0xA8 as base */
+		WARN_ON(pdev->device != PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL &&
+				pdev->device != PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL);
+		pci_write_config_byte(pdev, 0x42, slot);
+		pci_read_config_dword(pdev, 0xA8, &v);
+		break;
+
+	case SCR_CONTROL:
+		pci_read_config_byte(pdev, 0xA4 + slot, &raw);
+
+		/* read the DET field, bit0 and bit1 */
+		v |= ((raw & 0x02) << 1) | (raw & 0x01);
+
+		/* read the IPM field, bit2 and bit3 */
+		v |= ((raw >> 2) & 0x03) << 8;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	*val = v;
+	return 0;
+}
+
+static int zx_scr_write(struct ata_link *link, unsigned int scr, u32 val)
+{
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	int slot = 2 * link->ap->port_no + link->pmp;
+	u32 v = 0;
+
+	WARN_ON(pdev == NULL);
+
+	switch (scr) {
+	case SCR_ERROR:
+		/* devices PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL uses 0xA8 as base */
+		WARN_ON(pdev->device != PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL &&
+				pdev->device != PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL);
+		pci_write_config_byte(pdev, 0x42, slot);
+		pci_write_config_dword(pdev, 0xA8, val);
+		return 0;
+
+	case SCR_CONTROL:
+		/* set the DET field */
+		v |= ((val & 0x4) >> 1) | (val & 0x1);
+
+		/* set the IPM field */
+		v |= ((val >> 8) & 0x3) << 2;
+
+
+		pci_write_config_byte(pdev, 0xA4 + slot, v);
+
+
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+
+/**
+ *	zx_tf_load - send taskfile registers to host controller
+ *	@ap: Port to which output is sent
+ *	@tf: ATA taskfile register set
+ *
+ *	Outputs ATA taskfile to standard ATA host controller.
+ *
+ *	This is to fix the internal bug of zx chipsets, which will
+ *	reset the device register after changing the IEN bit on ctl
+ *	register.
+ */
+static void zx_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
+{
+	struct ata_taskfile ttf;
+
+	if (tf->ctl != ap->last_ctl)  {
+		ttf = *tf;
+		ttf.flags |= ATA_TFLAG_DEVICE;
+		tf = &ttf;
+	}
+	ata_sff_tf_load(ap, tf);
+}
+
+static const unsigned int zx_bar_sizes[] = {
+	8, 4, 8, 4, 16, 256
+};
+
+static const unsigned int zx100s_bar_sizes0[] = {
+	8, 4, 8, 4, 16, 0
+};
+
+static const unsigned int zx100s_bar_sizes1[] = {
+	8, 4, 0, 0, 16, 0
+};
+
+static int zx_prepare_host(struct pci_dev *pdev, struct ata_host **r_host)
+{
+	const struct ata_port_info *ppi0[] = {
+		&zx100s_port_info, NULL
+	};
+	const struct ata_port_info *ppi1[] = {
+		&zx100s_port_info, &ata_dummy_port_info
+	};
+	struct ata_host *host;
+	int i, rc;
+
+	if (pdev->device == PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL)
+		rc = ata_pci_bmdma_prepare_host(pdev, ppi0, &host);
+	else if (pdev->device == PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL)
+		rc = ata_pci_bmdma_prepare_host(pdev, ppi1, &host);
+	else
+		rc = -EINVAL;
+
+	if (rc)
+		return rc;
+
+	*r_host = host;
+
+	/* 9002 hosts four sata ports as M/S of the two channels */
+	/* 9003 hosts two sata ports as M/S of the one channel */
+	for (i = 0; i < host->n_ports; i++)
+		ata_slave_link_init(host->ports[i]);
+
+	return 0;
+}
+
+static void zx_configure(struct pci_dev *pdev, int board_id)
+{
+	u8 tmp8;
+
+	pci_read_config_byte(pdev, PCI_INTERRUPT_LINE, &tmp8);
+	dev_info(&pdev->dev, "routed to hard irq line %d\n",
+		 (int) (tmp8 & 0xf0) == 0xf0 ? 0 : tmp8 & 0x0f);
+
+	/* make sure SATA channels are enabled */
+	pci_read_config_byte(pdev, SATA_CHAN_ENAB, &tmp8);
+	if ((tmp8 & ALL_PORTS) != ALL_PORTS) {
+		dev_dbg(&pdev->dev, "enabling SATA channels (0x%x)\n",
+			(int)tmp8);
+		tmp8 |= ALL_PORTS;
+		pci_write_config_byte(pdev, SATA_CHAN_ENAB, tmp8);
+	}
+
+	/* make sure interrupts for each channel sent to us */
+	pci_read_config_byte(pdev, SATA_INT_GATE, &tmp8);
+	if ((tmp8 & ALL_PORTS) != ALL_PORTS) {
+		dev_dbg(&pdev->dev, "enabling SATA channel interrupts (0x%x)\n",
+			(int) tmp8);
+		tmp8 |= ALL_PORTS;
+		pci_write_config_byte(pdev, SATA_INT_GATE, tmp8);
+	}
+
+	/* make sure native mode is enabled */
+	pci_read_config_byte(pdev, SATA_NATIVE_MODE, &tmp8);
+	if ((tmp8 & NATIVE_MODE_ALL) != NATIVE_MODE_ALL) {
+		dev_dbg(&pdev->dev,
+			"enabling SATA channel native mode (0x%x)\n",
+			(int) tmp8);
+		tmp8 |= NATIVE_MODE_ALL;
+		pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8);
+	}
+}
+
+static int zx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	unsigned int i;
+	int rc;
+	struct ata_host *host = NULL;
+	int board_id = (int) ent->driver_data;
+	const unsigned int *bar_sizes;
+	int legacy_mode = 0;
+
+	ata_print_version_once(&pdev->dev, DRV_VERSION);
+
+	if (pdev->device == PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL ||
+		pdev->device == PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL) {
+		if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+			u8 tmp8, mask;
+
+			/* TODO: What if one channel is in native mode ... */
+			pci_read_config_byte(pdev, PCI_CLASS_PROG, &tmp8);
+			mask = (1 << 2) | (1 << 0);
+			if ((tmp8 & mask) != mask)
+				legacy_mode = 1;
+		}
+		if (legacy_mode)
+			return -EINVAL;
+	}
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	if (board_id == zx100s && pdev->device == PCI_DEVICE_ID_ZHAOXIN_DUAL_CHANNEL)
+		bar_sizes = &zx100s_bar_sizes0[0];
+	else if (board_id == zx100s && pdev->device == PCI_DEVICE_ID_ZHAOXIN_SING_CHANNEL)
+		bar_sizes = &zx100s_bar_sizes1[0];
+	else
+		bar_sizes = &zx_bar_sizes[0];
+
+	for (i = 0; i < ARRAY_SIZE(zx_bar_sizes); i++) {
+		if ((pci_resource_start(pdev, i) == 0) ||
+		    (pci_resource_len(pdev, i) < bar_sizes[i])) {
+			if (bar_sizes[i] == 0)
+				continue;
+
+			dev_err(&pdev->dev,
+				"invalid PCI BAR %u (sz 0x%llx, val 0x%llx)\n",
+				i,
+				(unsigned long long)pci_resource_start(pdev, i),
+				(unsigned long long)pci_resource_len(pdev, i));
+
+			return -ENODEV;
+		}
+	}
+
+	switch (board_id) {
+	case zx100s:
+		rc = zx_prepare_host(pdev, &host);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	if (rc)
+		return rc;
+
+	zx_configure(pdev, board_id);
+
+	pci_set_master(pdev);
+	return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
+				 IRQF_SHARED, &zx_sht);
+}
+
+module_pci_driver(zx_pci_driver);
+
+MODULE_AUTHOR("Yanchen:YanchenSun@zhaoxin.com");
+MODULE_DESCRIPTION("SCSI low-level driver for ZX SATA controllers");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, zx_pci_tbl);
+MODULE_VERSION(DRV_VERSION);

From 03e0070ab695570ac9f4174b7fecb3011f017ab8 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 15 Aug 2024 17:20:19 +0800
Subject: [PATCH 33/39] hwmon: Add support for Zhaoxin core temperature
 monitoring

Add support for the temperature sensor inside CPU. Supported are all known
variants of the Zhaoxin processors.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
---
 MAINTAINERS                     |   6 +
 arch/x86/configs/tencent.config |   1 +
 drivers/hwmon/Kconfig           |  13 ++
 drivers/hwmon/Makefile          |   1 +
 drivers/hwmon/via-cputemp.c     |   1 -
 drivers/hwmon/zhaoxin-cputemp.c | 305 ++++++++++++++++++++++++++++++++
 6 files changed, 326 insertions(+), 1 deletion(-)
 create mode 100644 drivers/hwmon/zhaoxin-cputemp.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 8ee519362a63..35979644a2e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23908,6 +23908,12 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	arch/x86/kernel/cpu/zhaoxin.c
 
+ZHAOXIN TEMPERATURE MONITORING DRIVERS
+M:	Leoliu-oc <leoliu-oc@zhaoxin.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	drivers/hwmon/zhaoxin-cputemp.c
+
 ZONEFS FILESYSTEM
 M:	Damien Le Moal <dlemoal@kernel.org>
 M:	Naohiro Aota <naohiro.aota@wdc.com>
diff --git a/arch/x86/configs/tencent.config b/arch/x86/configs/tencent.config
index a02ae967ea93..f385b32346be 100644
--- a/arch/x86/configs/tencent.config
+++ b/arch/x86/configs/tencent.config
@@ -1265,6 +1265,7 @@ CONFIG_SENSORS_TMP102=m
 CONFIG_SENSORS_TMP401=m
 CONFIG_SENSORS_TMP421=m
 CONFIG_SENSORS_VIA_CPUTEMP=m
+CONFIG_SENSORS_ZHAOXIN_CPUTEMP=m
 CONFIG_SENSORS_VIA686A=m
 CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_VT8231=m
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ec38c8892158..b9f3e18a3fda 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -2161,6 +2161,19 @@ config SENSORS_VIA_CPUTEMP
 	  sensor inside your CPU. Supported are all known variants of
 	  the VIA C7 and Nano.
 
+config SENSORS_ZHAOXIN_CPUTEMP
+	tristate "Zhaoxin CPU temperature sensor"
+	depends on X86
+	default m
+	select HWMON_VID
+	help
+	  If you say yes here you get support for the temperature
+	  sensor inside your CPU. Supported are all known variants of
+	  the Zhaoxin processors.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called zhaoxin-cputemp.
+
 config SENSORS_VIA686A
 	tristate "VIA686A"
 	depends on PCI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 4ac9452b5430..cab312e74d3c 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -211,6 +211,7 @@ obj-$(CONFIG_SENSORS_TMP464)	+= tmp464.o
 obj-$(CONFIG_SENSORS_TMP513)	+= tmp513.o
 obj-$(CONFIG_SENSORS_VEXPRESS)	+= vexpress-hwmon.o
 obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
+obj-$(CONFIG_SENSORS_ZHAOXIN_CPUTEMP)+= zhaoxin-cputemp.o
 obj-$(CONFIG_SENSORS_VIA686A)	+= via686a.o
 obj-$(CONFIG_SENSORS_VT1211)	+= vt1211.o
 obj-$(CONFIG_SENSORS_VT8231)	+= vt8231.o
diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c
index e5d18dac8ee7..0a5057dbe51a 100644
--- a/drivers/hwmon/via-cputemp.c
+++ b/drivers/hwmon/via-cputemp.c
@@ -273,7 +273,6 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = {
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_A,	NULL),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_D,	NULL),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_NANO,	NULL),
-	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY,		NULL),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
diff --git a/drivers/hwmon/zhaoxin-cputemp.c b/drivers/hwmon/zhaoxin-cputemp.c
new file mode 100644
index 000000000000..751d2c5a868a
--- /dev/null
+++ b/drivers/hwmon/zhaoxin-cputemp.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * zhaoxin-cputemp.c - Driver for Zhaoxin CPU core temperature monitoring
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/processor.h>
+#include <linux/cpu.h>
+#include <asm/msr.h>
+#include <asm/cpu_device_id.h>
+
+#define DRVNAME "zhaoxin_cputemp"
+
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME, SHOW_CRIT, SHOW_MAX };
+
+/* Functions declaration */
+
+struct zhaoxin_cputemp_data {
+	struct device *hwmon_dev;
+	const char *name;
+	u32 id;
+	u32 msr_temp;
+	u32 msr_crit;
+	u32 msr_max;
+};
+
+/* Sysfs stuff */
+
+static ssize_t name_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	int ret;
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+
+	if (attr->index == SHOW_NAME)
+		ret = sprintf(buf, "%s\n", data->name);
+	else    /* show label */
+		ret = sprintf(buf, "Core %d\n", data->id);
+	return ret;
+}
+
+static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx);
+	if (err)
+		return -EAGAIN;
+
+	return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000);
+}
+
+static ssize_t crit_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_cpu(data->id, data->msr_crit, &eax, &edx);
+	if (err)
+		return -EAGAIN;
+
+	return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xff) * 1000);
+}
+
+static ssize_t max_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_cpu(data->id, data->msr_max, &eax, &edx);
+	if (err)
+		return -EAGAIN;
+
+	return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xff) * 1000);
+}
+
+static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, SHOW_TEMP);
+static SENSOR_DEVICE_ATTR_RO(temp1_label, name, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR_RO(name, name, SHOW_NAME);
+static SENSOR_DEVICE_ATTR_RO(temp1_crit, crit, SHOW_CRIT);
+static SENSOR_DEVICE_ATTR_RO(temp1_max, max, SHOW_MAX);
+
+static struct attribute *zhaoxin_cputemp_attributes[] = {
+	&sensor_dev_attr_name.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_crit.dev_attr.attr,
+	&sensor_dev_attr_temp1_max.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group zhaoxin_cputemp_group = {
+	.attrs = zhaoxin_cputemp_attributes,
+};
+
+static int zhaoxin_cputemp_probe(struct platform_device *pdev)
+{
+	struct zhaoxin_cputemp_data *data;
+	int err;
+	u32 eax, edx;
+	struct cpuinfo_x86 *c = &cpu_data(pdev->id);
+
+	data = devm_kzalloc(&pdev->dev, sizeof(struct zhaoxin_cputemp_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->id = pdev->id;
+	data->name = "zhaoxin_cputemp";
+	data->msr_temp = 0x1423;
+	if (c->x86_model == 0x6b) {
+		data->msr_crit  = 0x175b;
+		data->msr_max   = 0x175a;
+	} else {
+		data->msr_crit = 0x1416;
+		data->msr_max = 0x1415;
+	}
+
+	/* test if we can access the TEMPERATURE MSR */
+	err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to access TEMPERATURE MSR, giving up\n");
+		return err;
+	}
+
+	platform_set_drvdata(pdev, data);
+
+	err = sysfs_create_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	if (err)
+		return err;
+
+	data->hwmon_dev = hwmon_device_register_for_thermal(&pdev->dev, data->name, data);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		dev_err(&pdev->dev, "Class registration failed (%d)\n", err);
+		goto exit_remove;
+	}
+
+	return 0;
+
+exit_remove:
+	sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	return err;
+}
+
+static int zhaoxin_cputemp_remove(struct platform_device *pdev)
+{
+	struct zhaoxin_cputemp_data *data = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	return 0;
+}
+
+static struct platform_driver zhaoxin_cputemp_driver = {
+	.driver = {
+		.name = DRVNAME,
+	},
+	.probe = zhaoxin_cputemp_probe,
+	.remove = zhaoxin_cputemp_remove,
+};
+
+struct pdev_entry {
+	struct list_head list;
+	struct platform_device *pdev;
+	unsigned int cpu;
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+static int zhaoxin_cputemp_online(unsigned int cpu)
+{
+	int err;
+	struct platform_device *pdev;
+	struct pdev_entry *pdev_entry;
+
+	pdev = platform_device_alloc(DRVNAME, cpu);
+	if (!pdev) {
+		err = -ENOMEM;
+		pr_err("Device allocation failed\n");
+		goto exit;
+	}
+
+	pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL);
+	if (!pdev_entry) {
+		err = -ENOMEM;
+		goto exit_device_put;
+	}
+
+	err = platform_device_add(pdev);
+	if (err) {
+		pr_err("Device addition failed (%d)\n", err);
+		goto exit_device_free;
+	}
+
+	pdev_entry->pdev = pdev;
+	pdev_entry->cpu = cpu;
+	mutex_lock(&pdev_list_mutex);
+	list_add_tail(&pdev_entry->list, &pdev_list);
+	mutex_unlock(&pdev_list_mutex);
+
+	return 0;
+
+exit_device_free:
+	kfree(pdev_entry);
+exit_device_put:
+	platform_device_put(pdev);
+exit:
+	return err;
+}
+
+static int zhaoxin_cputemp_down_prep(unsigned int cpu)
+{
+	struct pdev_entry *p;
+
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry(p, &pdev_list, list) {
+		if (p->cpu == cpu) {
+			platform_device_unregister(p->pdev);
+			list_del(&p->list);
+			mutex_unlock(&pdev_list_mutex);
+			kfree(p);
+			return 0;
+		}
+	}
+	mutex_unlock(&pdev_list_mutex);
+	return 0;
+}
+
+static const struct x86_cpu_id cputemp_ids[] __initconst = {
+	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, 0x3b, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, 0x3b, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, 0x5b, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, 0x5b, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, 0x6b, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, 0x6b, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
+
+static enum cpuhp_state zhaoxin_temp_online;
+
+static int __init zhaoxin_cputemp_init(void)
+{
+	int err;
+
+	if (!x86_match_cpu(cputemp_ids))
+		return -ENODEV;
+
+	err = platform_driver_register(&zhaoxin_cputemp_driver);
+	if (err)
+		goto exit;
+
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/zhaoxin:online",
+			zhaoxin_cputemp_online, zhaoxin_cputemp_down_prep);
+	if (err < 0)
+		goto exit_driver_unreg;
+
+	zhaoxin_temp_online = err;
+
+#ifndef CONFIG_HOTPLUG_CPU
+	if (list_empty(&pdev_list)) {
+		err = -ENODEV;
+		goto exit_hp_unreg;
+	}
+#endif
+	return 0;
+
+#ifndef CONFIG_HOTPLUG_CPU
+exit_hp_unreg:
+	cpuhp_remove_state_nocalls(zhaoxin_temp_online);
+#endif
+exit_driver_unreg:
+	platform_driver_unregister(&zhaoxin_cputemp_driver);
+exit:
+	return err;
+}
+
+static void __exit zhaoxin_cputemp_exit(void)
+{
+	cpuhp_remove_state(zhaoxin_temp_online);
+	platform_driver_unregister(&zhaoxin_cputemp_driver);
+}
+
+MODULE_DESCRIPTION("Zhaoxin CPU temperature monitor");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(HWMON_THERMAL);
+
+module_init(zhaoxin_cputemp_init)
+module_exit(zhaoxin_cputemp_exit)

From db9e472492505c478133224bd43fe576fdb65054 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 4 Mar 2024 10:12:17 +0800
Subject: [PATCH 34/39] USB:Fix kernel NULL pointer when unbind UHCI form
 vfio-pci

This bug is found in Zhaoxin platform, but it's a commom code bug.
Fail sequence:
step1: Unbind UHCI controller from native driver;
step2: Bind UHCI controller to vfio-pci, which will put UHCI controller in
	   one vfio group's device list and set UHCI's dev->driver_data to
	   struct vfio-pci(for UHCI)
step3: Unbind EHCI controller from native driver, will try to tell UHCI
	   native driver that "I'm removed by set
	   companion_hcd->self.hs_companion to NULL. However, companion_hcd
	   get from UHCI's dev->driver_data that has modified by vfio-pci
	   already. So, the vfio-pci structure will be damaged!
step4: Bind EHCI controller to vfio-pci driver, which will put EHCI
	   controller in the same vfio group as UHCI controller;
       ... ...
step5: Unbind UHCI controller from vfio-pci, which will delete UHCI from
	   vfio group device list that has been damaged in step 3. So, delete
	   operation can random result into a NULL pointer dereference with
	   the below stack dump.
step6: Bind UHCI controller to native driver;
step7: Unbind EHCI controller from vfio-pci, which will try to remove EHCI
	   controller from the vfio group;
step8: Bind EHCI controller to native driver;

[  929.114641] uhci_hcd 0000:00:10.0: remove, state 1
[  929.114652] usb usb1: USB disconnect, device number 1
[  929.114655] usb 1-1: USB disconnect, device number 2
[  929.270313] usb 1-2: USB disconnect, device number 3
[  929.318404] uhci_hcd 0000:00:10.0: USB bus 1 deregistered
[  929.343029] uhci_hcd 0000:00:10.1: remove, state 4
[  929.343045] usb usb3: USB disconnect, device number 1
[  929.343685] uhci_hcd 0000:00:10.1: USB bus 3 deregistered
[  929.369087] ehci-pci 0000:00:10.7: remove, state 4
[  929.369102] usb usb4: USB disconnect, device number 1
[  929.370325] ehci-pci 0000:00:10.7: USB bus 4 deregistered
[  932.398494] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[  932.398496] PGD 42a67d067 P4D 42a67d067 PUD 42a65f067 PMD 0
[  932.398502] Oops: 0002 [#2] SMP NOPTI
[  932.398505] CPU: 2 PID: 7824 Comm: vfio_unbind.sh Tainted: P   D  4.19.65-2020051917-rainos #1
[  932.398506] Hardware name: Shanghai Zhaoxin Semiconductor Co., Ltd. HX002EH/HX002EH,
	       	   BIOS HX002EH0_01_R480_R_200408 04/08/2020
[  932.398513] RIP: 0010:vfio_device_put+0x31/0xa0 [vfio]
[  932.398515] Code: 89 e5 41 54 53 4c 8b 67 18 48 89 fb 49 8d 74 24 30 e8 e3 0e f3 de
					 84 c0 74 67 48 8b 53 20 48 8b 43 28 48 8b 7b 18 48 89 42 08 <48> 89 10
					 48 b8+G26 00 01 00 00 00 00 ad de 48 89 43 20 48 b8 00 02 00
[  932.398516] RSP: 0018:ffffbbfd04cffc18 EFLAGS: 00010202
[  932.398518] RAX: 0000000000000000 RBX: ffff92c7ea717880 RCX: 0000000000000000
[  932.398519] RDX: ffff92c7ea713620 RSI: ffff92c7ea713630 RDI: ffff92c7ea713600
[  932.398521] RBP: ffffbbfd04cffc28 R08: ffff92c7f02a8080 R09: ffff92c7efc03980
[  932.398522] R10: ffffbbfd04cff9a8 R11: 0000000000000000 R12: ffff92c7ea713600
[  932.398523] R13: ffff92c7ed8bb0a8 R14: ffff92c7ea717880 R15: 0000000000000000
[  932.398525] FS:  00007f3031500740(0000) GS:ffff92c7f0280000(0000) knlGS:0000000000000000
[  932.398526] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  932.398527] CR2: 0000000000000000 CR3: 0000000428626004 CR4: 0000000000160ee0
[  932.398528] Call Trace:
[  932.398534]  vfio_del_group_dev+0xe8/0x2a0 [vfio]
[  932.398539]  ? __blocking_notifier_call_chain+0x52/0x60
[  932.398542]  ? do_wait_intr_irq+0x90/0x90
[  932.398546]  ? iommu_bus_notifier+0x75/0x100
[  932.398551]  vfio_pci_remove+0x20/0xa0 [vfio_pci]
[  932.398554]  pci_device_remove+0x3e/0xc0
[  932.398557]  device_release_driver_internal+0x17a/0x240
[  932.398560]  device_release_driver+0x12/0x20
[  932.398561]  unbind_store+0xee/0x180
[  932.398564]  drv_attr_store+0x27/0x40
[  932.398567]  sysfs_kf_write+0x3c/0x50
[  932.398568]  kernfs_fop_write+0x125/0x1a0
[  932.398572]  __vfs_write+0x3a/0x190
[  932.398575]  ? apparmor_file_permission+0x1a/0x20
[  932.398577]  ? security_file_permission+0x3b/0xc0
[  932.398581]  ? _cond_resched+0x1a/0x50
[  932.398582]  vfs_write+0xb8/0x1b0
[  932.398584]  ksys_write+0x5c/0xe0
[  932.398586]  __x64_sys_write+0x1a/0x20
[  932.398589]  do_syscall_64+0x5a/0x110
[  932.398592]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Using virt-manager/qemu to boot guest os, we can see the same fail sequence!

Fix this by determine whether the PCI Driver of the USB controller is a
kernel native driver. If not, do not let it modify UHCI's dev->driver_data.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
---
 drivers/usb/core/hcd-pci.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 990280688b25..df8f91e6a2c7 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -48,6 +48,9 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd,
 	struct pci_dev		*companion;
 	struct usb_hcd		*companion_hcd;
 	unsigned int		slot = PCI_SLOT(pdev->devfn);
+#if IS_ENABLED(CONFIG_X86)
+	struct pci_driver	*drv;
+#endif
 
 	/*
 	 * Iterate through other PCI functions in the same slot.
@@ -60,6 +63,18 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd,
 				PCI_SLOT(companion->devfn) != slot)
 			continue;
 
+#if IS_ENABLED(CONFIG_X86)
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ||
+			boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+			drv = companion->driver;
+			if (drv &&
+				strncmp(drv->name, "uhci_hcd", sizeof("uhci_hcd") - 1) &&
+				strncmp(drv->name, "ohci-pci", sizeof("ohci-pci") - 1) &&
+				strncmp(drv->name, "ehci-pci", sizeof("ehci-pci") - 1))
+				continue;
+			}
+#endif
+
 		/*
 		 * Companion device should be either UHCI,OHCI or EHCI host
 		 * controller, otherwise skip.

From a4ebe11fd23496f0678120ee33e29f6c180ea4f0 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Fri, 16 Aug 2024 10:51:04 +0800
Subject: [PATCH 35/39] x86/mce: Add NMIs setup in machine_check func

This will lead to console_owner_lock issue and HPET dead loop issue.

For example, The HPET dead loop issue:
CPU x                               CPU x
----                                ----
read_hpet()
  arch_spin_trylock(&hpet.lock)
  [CPU x got the hpet.lock]         #MCE happened
                                    do_machine_check()
                                      mce_panic()
                                        panic()
                                          kmsg_dump()
                                            pstore_dump()
                                              pstore_record_init()
                                                ktime_get_real_fast_ns()
                                                  read_hpet()
                                                  [dead loops]
This may lead to read_hpet dead loops.

The console_owner_lock issue is similar.

To avoid these issues, add NMIs setup When Handling #MC Exceptions.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/cpu/mce/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2cafc35f3b7b..4375286a20bb 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2121,11 +2121,17 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
 
 static __always_inline void exc_machine_check_user(struct pt_regs *regs)
 {
+	irqentry_state_t irq_state;
+
+	irq_state = irqentry_nmi_enter(regs);
+
 	irqentry_enter_from_user_mode(regs);
 
 	do_machine_check(regs);
 
 	irqentry_exit_to_user_mode(regs);
+
+	irqentry_nmi_exit(regs, irq_state);
 }
 
 #ifdef CONFIG_X86_64

From 18e7175498c21442a65f86418ce83f2ca2c3cea1 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Fri, 16 Aug 2024 11:03:31 +0800
Subject: [PATCH 36/39] i2c/zhaoxin: I2C controller driver enhancement and
 optimization

zhaoxin inclusion
category: feature

-------------------

1. Enhanced System Stuck Detection Method for Over 200ms:
   The method for detecting system stuck or hang scenarios lasting longer
   than 200 milliseconds has been improved. This ensures faster
   identification of potential system issues and can help in
   troubleshooting and preventing system instability.
2. Added Driver Installation Method into initramfs:
   A new method has been incorporated to install the necessary drivers
   into the initramfs (Initial RAM filesystem), which is loaded by the
   kernel before the root filesystem. This allows the drivers to be
   available during early system boot and can improve system
   initialization performance.
3. Fixed an Error Related to Byte Mode NACK Handling:
   An error that was occurring during the handling of NACK (Negative
   Acknowledgment) signals in byte mode communication has been addressed.
   This ensures proper error handling and communication stability in byte
   mode operations.
4. Moved Majority of Per-Message Processing to Interrupt Context:
   Most of the processing related to individual messages has been shifted
   to the interrupt context. This optimization improves system
   responsiveness and reduces latency by performing message-related tasks
   more efficiently within the interrupt service routine.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/i2c/busses/i2c-zhaoxin.c | 410 +++++++++++++++----------------
 1 file changed, 201 insertions(+), 209 deletions(-)

diff --git a/drivers/i2c/busses/i2c-zhaoxin.c b/drivers/i2c/busses/i2c-zhaoxin.c
index 15360b9ce441..695f5f231f36 100644
--- a/drivers/i2c/busses/i2c-zhaoxin.c
+++ b/drivers/i2c/busses/i2c-zhaoxin.c
@@ -1,10 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- *  Copyright(c) 2023 Shanghai Zhaoxin Semiconductor Corporation.
+ *  Copyright(c) 2024 Shanghai Zhaoxin Semiconductor Corporation.
  *                    All rights reserved.
  */
 
-#define DRIVER_VERSION "1.5.2"
+#define DRIVER_VERSION "1.6.0"
 
 #include <linux/acpi.h>
 #include <linux/delay.h>
@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/version.h>
 
-#define ZX_I2C_NAME "i2c_zhaoxin"
+#define ZX_I2C_NAME             "i2c_zhaoxin"
 
 /* REG_CR Bit fields */
 #define ZXI2C_REG_CR		0x00
@@ -32,7 +32,7 @@
 /* REG_TCR Bit fields */
 #define ZXI2C_REG_TCR		0x02
 #define ZXI2C_TCR_HS_MODE		BIT(13)
-#define ZXI2C_TCR_MASTER_READ		BIT(14)
+#define ZXI2C_TCR_READ			BIT(14)
 #define ZXI2C_TCR_FAST			BIT(15)
 
 /* REG_CSR Bit fields */
@@ -48,9 +48,7 @@
 #define ZXI2C_ISR_MASK_ALL		GENMASK(2, 0)
 #define ZXI2C_IRQ_FIFOEND		BIT(3)
 #define ZXI2C_IRQ_FIFONACK		BIT(4)
-#define ZXI2C_IRQ_MASK			(ZXI2C_ISR_MASK_ALL \
-					| ZXI2C_IRQ_FIFOEND \
-					| ZXI2C_IRQ_FIFONACK)
+#define ZXI2C_IRQ_MASK (ZXI2C_ISR_MASK_ALL | ZXI2C_IRQ_FIFOEND | ZXI2C_IRQ_FIFONACK)
 
 /* REG_IMR Bit fields */
 #define ZXI2C_REG_IMR		0x08
@@ -75,22 +73,30 @@
 #define ZXI2C_REG_TR		0x0C
 #define ZXI2C_REG_MCR		0x0E
 
+enum {
+	ZXI2C_BYTE_MODE,
+	ZXI2C_FIFO_MODE
+};
+
 struct zxi2c {
-	struct i2c_adapter adapter;
-	struct completion complete;
-	struct device *dev;
-	void __iomem *base;
-	struct clk *clk;
-	u16 tcr;
-	int irq;
-	u16 cmd_status;
-	u16 tr;
-	u16 mcr;
-	u16 csr;
-	u8 fstp;
-	u8 hrv;
-	ktime_t ti;
-	ktime_t to;
+	struct i2c_adapter	adapter;
+	struct completion	complete;
+	struct device		*dev;
+	void __iomem		*base;
+	struct clk		*clk;
+	struct i2c_msg		*msg;
+	int			irq;
+	int			ret;
+	u16			tcr;
+	u16			tr;
+	u16			mcr;
+	u16			csr;
+	u8			fstp;
+	u8			hrv;
+	bool			last;
+	u16			xfer_len;
+	u16			xfered_len;
+	unsigned int		mode;
 };
 
 /* parameters Constants */
@@ -115,8 +121,8 @@ static int zxi2c_wait_bus_ready(struct zxi2c *i2c)
 			dev_warn(i2c->dev, "timeout waiting for bus ready\n");
 			return -EBUSY;
 		}
-		tmp = ioread16(i2c->base + ZXI2C_REG_CR);
-		iowrite16(tmp | ZXI2C_CR_END_MASK,  i2c->base + ZXI2C_REG_CR);
+		tmp = ioread16(base + ZXI2C_REG_CR);
+		iowrite16(tmp | ZXI2C_CR_END_MASK,  base + ZXI2C_REG_CR);
 
 		msleep(20);
 	}
@@ -124,270 +130,253 @@ static int zxi2c_wait_bus_ready(struct zxi2c *i2c)
 	return 0;
 }
 
-static int zxi2c_wait_status(struct zxi2c *i2c, u8 status)
+static int zxi2c_irq_xfer(struct zxi2c *i2c)
 {
-	unsigned long time_left;
-
-	time_left = wait_for_completion_timeout(&i2c->complete, msecs_to_jiffies(ZXI2C_TIMEOUT));
-	if (!time_left) {
-		dev_err(i2c->dev, "bus transfer timeout\n");
-		return -EIO;
-	}
-
-	/*
-	 * During each byte access, the host performs clock stretching.
-	 * In this case, the thread may be interrupted by preemption,
-	 * resulting in a long stretching time.
-	 * However, some touchpad can only tolerate host clock stretching
-	 * of no more than 200 ms. We reduce the impact of this through
-	 * a retransmission mechanism.
-	 */
-	local_irq_disable();
-	i2c->to = ktime_get();
-	if (ktime_to_ms(ktime_sub(i2c->to, i2c->ti)) > ZXI2C_TIMEOUT) {
-		local_irq_enable();
-		dev_warn(i2c->dev, "thread has been blocked for a while\n");
-		return -EAGAIN;
-	}
-	i2c->ti = i2c->to;
-	local_irq_enable();
-
-	if (i2c->cmd_status & status)
-		return 0;
-
-	return -EIO;
-}
-
-static irqreturn_t zxi2c_isr(int irq, void *data)
-{
-	struct zxi2c *i2c = data;
-
-	/* save the status and write-clear it */
-	i2c->cmd_status = readw(i2c->base + ZXI2C_REG_ISR);
-	if (!i2c->cmd_status)
-		return IRQ_NONE;
-
-	writew(i2c->cmd_status, i2c->base + ZXI2C_REG_ISR);
-
-	complete(&i2c->complete);
-
-	return IRQ_HANDLED;
-}
-
-static int zxi2c_write(struct zxi2c *i2c, struct i2c_msg *msg, bool last)
-{
-	u16 val, tcr_val = i2c->tcr;
-	int xfer_len = 0;
+	u16 val;
+	struct i2c_msg *msg = i2c->msg;
+	u8 read = msg->flags & I2C_M_RD;
 	void __iomem *base = i2c->base;
 
-	writew(msg->buf[0] & 0xFF, base + ZXI2C_REG_CDR);
-	reinit_completion(&i2c->complete);
-	writew(tcr_val | msg->addr, base + ZXI2C_REG_TCR);
+	if (read) {
+		msg->buf[i2c->xfered_len] = readw(base + ZXI2C_REG_CDR) >> 8;
 
-	while (xfer_len < msg->len) {
-		int err;
-
-		err = zxi2c_wait_status(i2c, ZXI2C_ISR_BYTE_END);
-		if (err)
-			return err;
-
-		xfer_len++;
+		val = readw(base + ZXI2C_REG_CR) | ZXI2C_CR_CPU_RDY;
+		if (i2c->xfered_len == msg->len - 2)
+			val |= ZXI2C_CR_RX_END;
+		writew(val, base + ZXI2C_REG_CR);
+	} else {
 
 		val = readw(base + ZXI2C_REG_CSR);
 		if (val & ZXI2C_CSR_RCV_NOT_ACK) {
-			dev_dbg(i2c->dev, "write RCV NACK error\n");
+			dev_dbg_ratelimited(i2c->dev, "write RCV NACK error\n");
 			return -EIO;
 		}
 
 		if (msg->len == 0) {
 			val = ZXI2C_CR_TX_END | ZXI2C_CR_CPU_RDY | ZXI2C_CR_ENABLE;
 			writew(val, base + ZXI2C_REG_CR);
-			break;
+			return 0;
 		}
 
-		if (xfer_len == msg->len) {
-			if (last)
+		if ((i2c->xfered_len + 1) == msg->len) {
+			if (i2c->last)
 				writeb(ZXI2C_CR_TX_END, base + ZXI2C_REG_CR);
 		} else {
-			writew(msg->buf[xfer_len] & 0xFF, base + ZXI2C_REG_CDR);
+			writew(msg->buf[i2c->xfered_len + 1] & 0xFF, base + ZXI2C_REG_CDR);
 			writew(ZXI2C_CR_CPU_RDY | ZXI2C_CR_ENABLE, base + ZXI2C_REG_CR);
 		}
 	}
 
+	i2c->xfered_len++;
+
+	return i2c->xfered_len == msg->len;
+}
+
+/* 'irq == true' means in interrupt context */
+int zxi2c_fifo_irq_xfer(struct zxi2c *i2c, bool irq)
+{
+	u16 i;
+	u8 tmp;
+	struct i2c_msg *msg = i2c->msg;
+	void __iomem *base = i2c->base;
+	bool read = !!(msg->flags & I2C_M_RD);
+
+	if (irq) {
+		/* get the received data */
+		if (read)
+			for (i = 0; i < i2c->xfer_len; i++)
+				msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR);
+
+		i2c->xfered_len += i2c->xfer_len;
+		if (i2c->xfered_len == msg->len)
+			return 1;
+	}
+
+	/* reset fifo buffer */
+	tmp = ioread8(base + ZXI2C_REG_HCR);
+	iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
+
+	/* set xfer len */
+	i2c->xfer_len = min_t(u16, msg->len - i2c->xfered_len, ZXI2C_FIFO_SIZE);
+	if (read) {
+		iowrite8(i2c->xfer_len - 1, base + ZXI2C_REG_HRLR);
+	} else {
+		iowrite8(i2c->xfer_len - 1, base + ZXI2C_REG_HTLR);
+		/* set write data */
+		for (i = 0; i < i2c->xfer_len; i++)
+			iowrite8(msg->buf[i2c->xfered_len + i], base + ZXI2C_REG_HTDR);
+	}
+
+	/* prepare to stop transmission */
+	if (i2c->hrv && msg->len == (i2c->xfered_len + i2c->xfer_len)) {
+		tmp = ioread8(base + ZXI2C_REG_CR);
+		tmp |= read ? ZXI2C_CR_RX_END : ZXI2C_CR_TX_END;
+		iowrite8(tmp, base + ZXI2C_REG_CR);
+	}
+
+	if (irq) {
+		/* continue transmission */
+		tmp = ioread8(base + ZXI2C_REG_CR);
+		iowrite8(tmp |= ZXI2C_CR_CPU_RDY, base + ZXI2C_REG_CR);
+	} else {
+		u16 tcr_val = i2c->tcr;
+
+		/* start transmission */
+		tcr_val |= read ? ZXI2C_TCR_READ : 0;
+		writew(tcr_val | msg->addr, base + ZXI2C_REG_TCR);
+	}
+
 	return 0;
 }
 
+static irqreturn_t zxi2c_isr(int irq, void *data)
+{
+	struct zxi2c *i2c = data;
+	u8 status;
+
+	/* save the status and write-clear it */
+	status = readw(i2c->base + ZXI2C_REG_ISR);
+	if (!status)
+		return IRQ_NONE;
+
+	writew(status, i2c->base + ZXI2C_REG_ISR);
+
+	i2c->ret = 0;
+	if (status & ZXI2C_ISR_NACK_ADDR)
+		i2c->ret = -EIO;
+
+	if (!i2c->ret) {
+		if (i2c->mode == ZXI2C_BYTE_MODE)
+			i2c->ret = zxi2c_irq_xfer(i2c);
+		else
+			i2c->ret = zxi2c_fifo_irq_xfer(i2c, true);
+	}
+
+	if (i2c->ret)
+		complete(&i2c->complete);
+
+	return IRQ_HANDLED;
+}
+
+static int zxi2c_write(struct zxi2c *i2c, struct i2c_msg *msg, int last)
+{
+	u16 tcr_val = i2c->tcr;
+
+	i2c->last = last;
+
+	writew(msg->buf[0] & 0xFF, i2c->base + ZXI2C_REG_CDR);
+
+	reinit_completion(&i2c->complete);
+
+	tcr_val |= msg->addr & 0x7f;
+
+	writew(tcr_val, i2c->base + ZXI2C_REG_TCR);
+
+	if (!wait_for_completion_timeout(&i2c->complete, ZXI2C_TIMEOUT))
+		return -ETIMEDOUT;
+
+	return i2c->ret;
+}
+
 static int zxi2c_read(struct zxi2c *i2c, struct i2c_msg *msg, bool first)
 {
 	u16 val, tcr_val = i2c->tcr;
-	u32 xfer_len = 0;
-	void __iomem *base = i2c->base;
 
-	val = readw(base + ZXI2C_REG_CR);
+	val = readw(i2c->base + ZXI2C_REG_CR);
 	val &= ~(ZXI2C_CR_TX_END | ZXI2C_CR_RX_END);
 
 	if (msg->len == 1)
 		val |= ZXI2C_CR_RX_END;
 
-	writew(val, base + ZXI2C_REG_CR);
+	writew(val, i2c->base + ZXI2C_REG_CR);
 
 	reinit_completion(&i2c->complete);
 
-	tcr_val |= ZXI2C_TCR_MASTER_READ | msg->addr;
+	tcr_val |= ZXI2C_TCR_READ | (msg->addr & 0x7f);
 
-	writew(tcr_val, base + ZXI2C_REG_TCR);
+	writew(tcr_val, i2c->base + ZXI2C_REG_TCR);
 
 	if (!first) {
-		val = readw(base + ZXI2C_REG_CR);
+		val = readw(i2c->base + ZXI2C_REG_CR);
 		val |= ZXI2C_CR_CPU_RDY;
-		writew(val, base + ZXI2C_REG_CR);
+		writew(val, i2c->base + ZXI2C_REG_CR);
 	}
 
-	while (xfer_len < msg->len) {
-		int err;
+	if (!wait_for_completion_timeout(&i2c->complete, ZXI2C_TIMEOUT))
+		return -ETIMEDOUT;
 
-		err = zxi2c_wait_status(i2c, ZXI2C_ISR_BYTE_END);
-		if (err)
-			return err;
-
-		msg->buf[xfer_len] = readw(base + ZXI2C_REG_CDR) >> 8;
-		xfer_len++;
-
-		val = readw(base + ZXI2C_REG_CR) | ZXI2C_CR_CPU_RDY;
-		if (xfer_len == msg->len - 1)
-			val |= ZXI2C_CR_RX_END;
-		writew(val, base + ZXI2C_REG_CR);
-	}
-
-	return 0;
+	return i2c->ret;
 }
 
-static int zxi2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+int zxi2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 {
 	struct i2c_msg *msg;
 	int i;
 	int ret = 0;
 	struct zxi2c *i2c = i2c_get_adapdata(adap);
 
+	i2c->mode = ZXI2C_BYTE_MODE;
 	for (i = 0; ret >= 0 && i < num; i++) {
-		msg = &msgs[i];
-		if (msg->len == 0) {
-			dev_dbg(i2c->dev, "zero len unsupported\n");
-			return -ENODEV;
-		}
+		i2c->msg = msg = &msgs[i];
+		i2c->xfered_len = 0;
+		if (msg->len == 0)
+			return -EIO;
+
 		if (msg->flags & I2C_M_RD)
 			ret = zxi2c_read(i2c, msg, i == 0);
 		else
-			ret = zxi2c_write(i2c, msg, i == (num - 1));
+			ret = zxi2c_write(i2c, msg, (i + 1) == num);
 	}
 
 	return (ret < 0) ? ret : i;
 }
 
-static int zxi2c_fifo_xfer(struct zxi2c *i2c, struct i2c_msg *msg)
-{
-	u16 xfered_len = 0;
-	u16 byte_left = msg->len;
-	u16 tcr_val = i2c->tcr;
-	void __iomem *base = i2c->base;
-	bool read = !!(msg->flags & I2C_M_RD);
-
-	while (byte_left) {
-		u16 i;
-		u8 tmp;
-		int error;
-		u16 xfer_len = min_t(u16, byte_left, ZXI2C_FIFO_SIZE);
-
-		byte_left -= xfer_len;
-
-		/* reset fifo buffer */
-		tmp = ioread8(base + ZXI2C_REG_HCR);
-		iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
-
-		/* set xfer len */
-		if (read)
-			iowrite8(xfer_len - 1, base + ZXI2C_REG_HRLR);
-		else {
-			iowrite8(xfer_len - 1, base + ZXI2C_REG_HTLR);
-			/* set write data */
-			for (i = 0; i < xfer_len; i++)
-				iowrite8(msg->buf[xfered_len + i], base + ZXI2C_REG_HTDR);
-		}
-
-		/* prepare to stop transmission */
-		if (i2c->hrv && !byte_left) {
-			tmp = ioread8(i2c->base + ZXI2C_REG_CR);
-			tmp |= read ? ZXI2C_CR_RX_END : ZXI2C_CR_TX_END;
-			iowrite8(tmp, base + ZXI2C_REG_CR);
-		}
-
-		reinit_completion(&i2c->complete);
-
-		if (xfered_len) {
-			/* continue transmission */
-			tmp = ioread8(i2c->base + ZXI2C_REG_CR);
-			iowrite8(tmp |= ZXI2C_CR_CPU_RDY, i2c->base + ZXI2C_REG_CR);
-		} else {
-			/* start transmission */
-			tcr_val |= (read ? ZXI2C_TCR_MASTER_READ : 0);
-			writew(tcr_val | msg->addr, base + ZXI2C_REG_TCR);
-		}
-
-		error = zxi2c_wait_status(i2c, ZXI2C_IRQ_FIFOEND);
-		if (error)
-			return error;
-
-		/* get the received data */
-		if (read)
-			for (i = 0; i < xfer_len; i++)
-				msg->buf[xfered_len + i] =
-					ioread8(base + ZXI2C_REG_HRDR);
-
-		xfered_len += xfer_len;
-	}
-
-	return 1;
-}
-
 static int zxi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 {
 	u8 tmp;
 	int ret;
 	struct zxi2c *i2c = (struct zxi2c *)i2c_get_adapdata(adap);
+	void __iomem *base = i2c->base;
 
 	ret = zxi2c_wait_bus_ready(i2c);
 	if (ret)
 		return ret;
 
-	tmp = ioread8(i2c->base + ZXI2C_REG_CR);
+	tmp = ioread8(base + ZXI2C_REG_CR);
 	tmp &= ~(ZXI2C_CR_RX_END | ZXI2C_CR_TX_END);
 
-	i2c->ti = ktime_get();
 	if (num == 1 && msgs->len >= 2 && (i2c->hrv || msgs->len <= ZXI2C_FIFO_SIZE)) {
 		/* enable fifo mode */
-		iowrite16(ZXI2C_CR_FIFO_MODE | tmp, i2c->base + ZXI2C_REG_CR);
+		iowrite16(ZXI2C_CR_FIFO_MODE | tmp, base + ZXI2C_REG_CR);
 		/* clear irq status */
-		iowrite8(ZXI2C_IRQ_MASK, i2c->base + ZXI2C_REG_ISR);
+		iowrite8(ZXI2C_IRQ_MASK, base + ZXI2C_REG_ISR);
 		/* enable fifo irq */
-		iowrite8(ZXI2C_ISR_NACK_ADDR | ZXI2C_IRQ_FIFOEND, i2c->base + ZXI2C_REG_IMR);
+		iowrite8(ZXI2C_ISR_NACK_ADDR | ZXI2C_IRQ_FIFOEND, base + ZXI2C_REG_IMR);
 
-		ret = zxi2c_fifo_xfer(i2c, msgs);
+		i2c->msg = msgs;
+		i2c->mode = ZXI2C_FIFO_MODE;
+		i2c->xfer_len = i2c->xfered_len = 0;
+
+		zxi2c_fifo_irq_xfer(i2c, 0);
+
+		if (!wait_for_completion_timeout(&i2c->complete, ZXI2C_TIMEOUT))
+			return -ETIMEDOUT;
+
+		ret = i2c->ret;
 	} else {
 		/* enable byte mode */
-		iowrite16(tmp, i2c->base + ZXI2C_REG_CR);
+		iowrite16(tmp, base + ZXI2C_REG_CR);
 		/* clear irq status */
-		iowrite8(ZXI2C_IRQ_MASK, i2c->base + ZXI2C_REG_ISR);
+		iowrite8(ZXI2C_IRQ_MASK, base + ZXI2C_REG_ISR);
 		/* enable byte irq */
-		iowrite8(ZXI2C_ISR_NACK_ADDR | ZXI2C_IMR_BYTE, i2c->base + ZXI2C_REG_IMR);
+		iowrite8(ZXI2C_ISR_NACK_ADDR | ZXI2C_IMR_BYTE, base + ZXI2C_REG_IMR);
 
 		ret = zxi2c_xfer(adap, msgs, num);
-		if (ret < 0)
-			iowrite16(tmp | ZXI2C_CR_END_MASK, i2c->base + ZXI2C_REG_CR);
-		/* make sure the state machine is stopped */
-		usleep_range(1, 2);
+		if (ret == -ETIMEDOUT)
+			iowrite16(tmp | ZXI2C_CR_END_MASK, base + ZXI2C_REG_CR);
 	}
 	/* dis interrupt */
-	iowrite8(0, i2c->base + ZXI2C_REG_IMR);
+	iowrite8(0, base + ZXI2C_REG_IMR);
 
 	return ret;
 }
@@ -412,7 +401,7 @@ static const u32 zxi2c_speed_params_table[][3] = {
 	{ I2C_MAX_FAST_MODE_FREQ, ZXI2C_TCR_FAST, ZXI2C_GOLD_FSTP_400K },
 	{ I2C_MAX_FAST_MODE_PLUS_FREQ, ZXI2C_TCR_FAST, ZXI2C_GOLD_FSTP_1M },
 	{ I2C_MAX_HIGH_SPEED_MODE_FREQ, ZXI2C_TCR_HS_MODE | ZXI2C_TCR_FAST,
-		ZXI2C_GOLD_FSTP_3400K },
+	  ZXI2C_GOLD_FSTP_3400K },
 	/* never reached, keep for debug. freq src is 27M mode */
 	{ I2C_MAX_STANDARD_MODE_FREQ, 0, 0x83 },
 	{ I2C_MAX_FAST_MODE_FREQ, ZXI2C_TCR_FAST, 0x1e },
@@ -449,10 +438,11 @@ static void zxi2c_get_bus_speed(struct zxi2c *i2c)
 		 * use golden value and warn user
 		 */
 		dev_warn(i2c->dev, "speed:%d, fstp:0x%x, golden:0x%x\n", params[0], fstp,
-			params[2]);
+			 params[2]);
 		i2c->tr = params[2] | 0xff00;
-	} else
+	} else {
 		i2c->tr = fstp | 0xff00;
+	}
 
 	i2c->tcr = params[1];
 	i2c->mcr = ioread16(i2c->base + ZXI2C_REG_MCR);
@@ -497,6 +487,7 @@ static int zxi2c_init(struct platform_device *pdev, struct zxi2c **pi2c)
 	platform_set_drvdata(pdev, i2c);
 
 	*pi2c = i2c;
+
 	return 0;
 }
 
@@ -517,12 +508,13 @@ static int zxi2c_probe(struct platform_device *pdev)
 	adap = &i2c->adapter;
 	adap->owner = THIS_MODULE;
 	adap->algo = &zxi2c_algorithm;
-	adap->retries = 2;
+
 	adap->quirks = &zxi2c_quirks;
+
 	adap->dev.parent = &pdev->dev;
 	ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
 	snprintf(adap->name, sizeof(adap->name), "zhaoxin-%s-%s", dev_name(pdev->dev.parent),
-		dev_name(i2c->dev));
+		 dev_name(i2c->dev));
 	i2c_set_adapdata(adap, i2c);
 
 	error = i2c_add_adapter(adap);
@@ -530,7 +522,7 @@ static int zxi2c_probe(struct platform_device *pdev)
 		return error;
 
 	dev_info(i2c->dev, "adapter /dev/i2c-%d registered. version %s\n", adap->nr,
-		DRIVER_VERSION);
+		 DRIVER_VERSION);
 
 	return 0;
 }

From 6ed88a56c790431e74e664fb45f9fe91bd4dd338 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Fri, 16 Aug 2024 11:07:10 +0800
Subject: [PATCH 37/39] x86/hpet: Read HPET directly if panic in progress
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

zhaoxin inclusion
category: other

-------------------

When the clocksource of the system is HPET，a CPU executing read_hpet might
be interrupted by #GP/#PF to executing the panic，this may lead to
read_hpet dead loops:

CPU x				    CPU x
----                                ----
read_hpet()
  arch_spin_trylock(&hpet.lock)
  [CPU x got the hpet.lock]         #GP/#PF happened
                                    panic()
                                      kmsg_dump()
                                        pstore_dump()
                                          pstore_record_init()
                                            ktime_get_real_fast_ns()
                                              read_hpet()
                                              [dead loops]

To avoid this dead loops, read HPET directly if panic in progress.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/kernel/hpet.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 046bc9d57e99..2626fa052b45 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -804,6 +804,12 @@ static u64 read_hpet(struct clocksource *cs)
 	if (in_nmi())
 		return (u64)hpet_readl(HPET_COUNTER);
 
+	/*
+	 * Read HPET directly if panic in progress.
+	 */
+	if (unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID))
+		return (u64)hpet_readl(HPET_COUNTER);
+
 	/*
 	 * Read the current state of the lock and HPET value atomically.
 	 */

From 1e425ba9127d344998738688c024490d496c0771 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Thu, 15 Aug 2024 15:36:38 +0800
Subject: [PATCH 38/39] x86/mce/zhaoxin: Enable mcelog to decode PCIE, ZDI/ZPI,
 and DRAM errors

zhaoxin inclusion
category: feature

-------------------

The mcelog cannot decode PCIE, ZDI/ZPI, and DRAM errors in the FFM
(Firmware First Mode).
The purpose of this patch is to enable mcelog to decode PCIE, ZDI/ZPI, and
DRAM errors that occur on Zhaoxin processors, so that the cause of these
errors can be quickly located.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 arch/x86/include/asm/mce.h     |   6 ++
 arch/x86/kernel/acpi/apei.c    |  28 +++++-
 arch/x86/kernel/cpu/mce/apei.c | 167 +++++++++++++++++++++++++++++++++
 drivers/acpi/apei/apei-base.c  |  11 +++
 drivers/acpi/apei/ghes.c       |  33 ++++++-
 include/acpi/apei.h            |   2 +
 6 files changed, 242 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 180b1cbfcc4e..a02d2215a79f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -289,6 +289,12 @@ struct cper_sec_mem_err;
 extern void apei_mce_report_mem_error(int corrected,
 				      struct cper_sec_mem_err *mem_err);
 
+extern void zx_apei_mce_report_mem_error(struct cper_sec_mem_err *mem_err);
+struct cper_sec_pcie;
+extern void zx_apei_mce_report_pcie_error(int corrected, struct cper_sec_pcie *pcie_err);
+struct cper_sec_proc_generic;
+extern void zx_apei_mce_report_zdi_error(struct cper_sec_proc_generic *zdi_err);
+
 /*
  * Enumerate new IP types and HWID values in AMD processors which support
  * Scalable MCA.
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index 0916f00a992e..e3782035d7c3 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -40,10 +40,36 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data)
 void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 {
 #ifdef CONFIG_X86_MCE
-	apei_mce_report_mem_error(sev, mem_err);
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
+		zx_apei_mce_report_mem_error(mem_err);
+	else
+		apei_mce_report_mem_error(sev, mem_err);
 #endif
 }
 
+void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
+{
+#ifdef CONFIG_X86_MCE
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
+		zx_apei_mce_report_pcie_error(sev, pcie_err);
+#endif
+}
+
+bool arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err)
+{
+#ifdef CONFIG_X86_MCE
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ||
+	     boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) &&
+	    (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC))) {
+		zx_apei_mce_report_zdi_error(zdi_err);
+		return true;
+	}
+#endif
+	return false;
+}
+
 int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
 {
 	return apei_smca_report_x86_error(ctx_info, lapic_id);
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 8ed341714686..c77cffffc696 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -63,6 +63,173 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 }
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
+void zx_apei_mce_report_mem_error(struct cper_sec_mem_err *mem_err)
+{
+	struct mce m;
+	int apei_error = 0;
+
+	if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
+		return;
+
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+		return;
+
+	mce_setup(&m);
+	m.misc = 0;
+	m.misc = mem_err->module;
+	m.addr = mem_err->physical_addr;
+	if (mem_err->card == 0)
+		m.bank = 9;
+	else
+		m.bank = 10;
+
+	switch (mem_err->error_type) {
+	case 2:
+		m.status = 0x9c20004000010080;
+		break;
+	case 3:
+		m.status = 0xbe40000000020090;
+		apei_error = apei_write_mce(&m);
+		break;
+	case 8:
+		if (mem_err->requestor_id == 2) {
+			m.status = 0x98200040000400b0;
+		} else if (mem_err->requestor_id == 3) {
+			m.status = 0xba400000000600a0;
+			apei_error = apei_write_mce(&m);
+		} else if (mem_err->requestor_id == 4) {
+			m.status = 0x98200100000300b0;
+		} else if (mem_err->requestor_id == 5) {
+			m.status = 0xba000000000500b0;
+			apei_error = apei_write_mce(&m);
+		} else {
+			pr_info("Undefined Parity error\n");
+		}
+		break;
+	case 10:
+		if (mem_err->requestor_id == 6) {
+			m.status = 0xba400000000700a0;
+			apei_error = apei_write_mce(&m);
+		} else if (mem_err->requestor_id == 7) {
+			m.status = 0xba000000000800b0;
+			apei_error = apei_write_mce(&m);
+		} else {
+			pr_info("Undefined dvad error\n");
+		}
+		break;
+	case 13:
+		m.status = 0x9c200040000100c0;
+		break;
+	case 14:
+		m.status = 0xbd000000000200c0;
+		apei_error = apei_write_mce(&m);
+		break;
+	}
+	mce_log(&m);
+}
+EXPORT_SYMBOL_GPL(zx_apei_mce_report_mem_error);
+
+void zx_apei_mce_report_pcie_error(int severity, struct cper_sec_pcie *pcie_err)
+{
+	struct mce m;
+	int apei_error = 0;
+
+	if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
+		return;
+
+	mce_setup(&m);
+	m.addr = 0;
+	m.misc = 0;
+	m.misc |= (u64)pcie_err->device_id.segment << 32;
+	m.misc |= pcie_err->device_id.bus << 24;
+	m.misc |= pcie_err->device_id.device << 19;
+	m.misc |= pcie_err->device_id.function << 16;
+	m.bank = 6;
+
+	switch (severity) {
+	case 1:
+		m.status = 0x9820004000020e0b;
+		break;
+	case 2:
+		m.status = 0xba20000000010e0b;
+		break;
+	case 3:
+		m.status = 0xbd20000000000e0b;
+		apei_error = apei_write_mce(&m);
+		break;
+	default:
+		pr_info("Undefine pcie error\n");
+		break;
+	}
+	mce_log(&m);
+}
+EXPORT_SYMBOL_GPL(zx_apei_mce_report_pcie_error);
+
+void zx_apei_mce_report_zdi_error(struct cper_sec_proc_generic *zdi_err)
+{
+	struct mce m;
+	int apei_error = 0;
+
+	if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
+		return;
+
+	mce_setup(&m);
+	m.misc = 0;
+	m.misc |= (zdi_err->requestor_id & 0xff) << 19;
+	m.misc |= ((zdi_err->requestor_id & 0xff00) >> 8) >> 24;
+	m.bank = 5;
+	switch (zdi_err->responder_id) {
+	case 2:
+		m.status = 0xba00000000040e0f;
+		apei_error = apei_write_mce(&m);
+		break;
+	case 3:
+		m.status = 0xba00000000030e0f;
+		apei_error = apei_write_mce(&m);
+		break;
+	case 4:
+		m.status = 0xba00000000020e0f;
+		apei_error = apei_write_mce(&m);
+		break;
+	case 5:
+		m.status = 0xba00000000010e0f;
+		apei_error = apei_write_mce(&m);
+		break;
+	case 6:
+		m.status = 0x9820004000090e0f;
+		break;
+	case 7:
+		m.status = 0x9820004000080e0f;
+		break;
+	case 8:
+		m.status = 0x9820004000070e0f;
+		break;
+	case 9:
+		m.status = 0x9820004000060e0f;
+		break;
+	case 10:
+		m.status = 0x9820004000050e0f;
+		break;
+	case 11:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+		m.status = 0x98200040000b0e0f;
+		break;
+	case 16:
+	case 17:
+	case 18:
+		m.status = 0x98200040000c0e0f;
+		break;
+	default:
+		pr_info("Undefined ZDI Error\n");
+		break;
+	}
+	mce_log(&m);
+}
+EXPORT_SYMBOL_GPL(zx_apei_mce_report_zdi_error);
+
 int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
 {
 	const u64 *i_mce = ((const u64 *) (ctx_info + 1));
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index c7c26872f4ce..ad8d5d5e97cc 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -773,6 +773,17 @@ void __weak arch_apei_report_mem_error(int sev,
 }
 EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
 
+void __weak arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
+{
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_pcie_error);
+
+bool __weak arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err)
+{
+	return false;
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_zdi_error);
+
 int apei_osc_setup(void)
 {
 	static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ab2a82cb1b0b..7eb59b4f2794 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -703,6 +703,9 @@ static bool ghes_do_proc(struct ghes *ghes,
 			queued = ghes_handle_memory_failure(gdata, sev, sync);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+			struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+			arch_apei_report_pcie_error(sec_sev, pcie_err);
 			ghes_handle_aer(gdata);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
@@ -710,10 +713,13 @@ static bool ghes_do_proc(struct ghes *ghes,
 		} else {
 			void *err = acpi_hest_get_payload(gdata);
 
-			ghes_defer_non_standard_event(gdata, sev);
-			log_non_standard_event(sec_type, fru_id, fru_text,
-					       sec_sev, err,
-					       gdata->error_data_length);
+			if (!arch_apei_report_zdi_error(sec_type,
+							(struct cper_sec_proc_generic *)err)) {
+				ghes_defer_non_standard_event(gdata, sev);
+				log_non_standard_event(sec_type, fru_id, fru_text,
+						       sec_sev, err,
+						       gdata->error_data_length);
+			}
 		}
 	}
 
@@ -1091,6 +1097,8 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
 	u32 len, node_len;
 	u64 buf_paddr;
 	int sev, rc;
+	struct acpi_hest_generic_data *gdata;
+	guid_t *sec_type;
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
 		return -EOPNOTSUPP;
@@ -1126,6 +1134,23 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
 
 	sev = ghes_severity(estatus->error_severity);
 	if (sev >= GHES_SEV_PANIC) {
+		apei_estatus_for_each_section(estatus, gdata) {
+			sec_type = (guid_t *)gdata->section_type;
+			if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+				struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+				arch_apei_report_mem_error(sev, mem_err);
+			} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+				struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+				arch_apei_report_pcie_error(sev, pcie_err);
+			} else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
+				struct cper_sec_proc_generic *zdi_err =
+							acpi_hest_get_payload(gdata);
+
+				arch_apei_report_zdi_error(sec_type, zdi_err);
+			}
+		}
 		ghes_print_queued_estatus();
 		__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
 	}
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index dc60f7db5524..808cfa7d16b1 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -52,6 +52,8 @@ int erst_clear(u64 record_id);
 
 int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
 void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
+void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err);
+bool arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err);
 
 #endif
 #endif

From 8ab16a0e2f64b81510da553408e4c10e45810fc2 Mon Sep 17 00:00:00 2001
From: leoliu-oc <leoliu-oc@zhaoxin.com>
Date: Mon, 19 Aug 2024 14:37:35 +0800
Subject: [PATCH 39/39] iommu/dma: Fix not fully traversing iova reservations
 issue

For multiple devices in the same iommu group, sorted later devices (based
on Bus:Dev.Func) have the RMRR.

Sorted earlier device (without RMRR) initialized the iova domain causing
the sorted later device goto done_unlock.

Then, the sorted later device (with RMRR) cannot execute the
iova_reserve_iommu_regions to reserve the RMRR in the group's iova domain,
and other devices (in the same group) alloc iova in RMRR are permitted.

DMA iova addresses conflict with RMRR in this case.

There is a need to make sure all devices of the same group execute reserve
iova.

Substitute iova_reserve_iommu_regions with iova_reserve_pci_regions
(reserved PCI window)and iova_reserve_iommu_regions(reserved resv-region,
like RMRR and msi range). And then, goto iova_reserve_iommu_regions could
avoid the problem when if (iovad->start_pfn) is true.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
---
 drivers/iommu/dma-iommu.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4a91275bc221..b3a70eb8a349 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -581,6 +581,18 @@ int iova_reserve_domain_addr(struct iommu_domain *domain, dma_addr_t start, dma_
 }
 EXPORT_SYMBOL_GPL(iova_reserve_domain_addr);
 
+static int iova_reserve_pci_regions(struct device *dev, struct iommu_domain *domain)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+	int ret = 0;
+
+	if (dev_is_pci(dev))
+		ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+
+	return ret;
+}
+
 static int iova_reserve_iommu_regions(struct device *dev,
 		struct iommu_domain *domain)
 {
@@ -590,12 +602,6 @@ static int iova_reserve_iommu_regions(struct device *dev,
 	LIST_HEAD(resv_regions);
 	int ret = 0;
 
-	if (dev_is_pci(dev)) {
-		ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
-		if (ret)
-			return ret;
-	}
-
 	iommu_get_resv_regions(dev, &resv_regions);
 	list_for_each_entry(region, &resv_regions, list) {
 		unsigned long lo, hi;
@@ -727,7 +733,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		}
 
 		ret = 0;
-		goto done_unlock;
+		goto iova_reserve_iommu;
 	}
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
@@ -742,6 +748,11 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	    (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
 		domain->type = IOMMU_DOMAIN_DMA;
 
+	ret = iova_reserve_pci_regions(dev, domain);
+	if (ret)
+		goto done_unlock;
+
+iova_reserve_iommu:
 	ret = iova_reserve_iommu_regions(dev, domain);
 
 done_unlock: