Merge Intel thermal control drivers changes for v6.2

- Add Raptor Lake-S support to the intel_tcc_cooling driver (Zhang
   Rui).

 - Make the intel_tcc_cooling driver detect TCC locking (Zhang Rui).

 - Address Coverity warning in intel_hfi_process_event() (Ricardo Neri).

 - Prevent accidental clearing of HFI in the package thermal interrupt
   status (Srinivas Pandruvada).

 - Protect the clearing of status bits in MSR_IA32_PACKAGE_THERM_STATUS
   and MSR_IA32_THERM_STATUS (Srinivas Pandruvada).

 - Allow the HFI interrupt handler to ACK an event for the same
   timestamp (Srinivas Pandruvada).

* thermal-intel:
  thermal: intel: hfi: ACK HFI for the same timestamp
  thermal: intel: Protect clearing of thermal status bits
  thermal: intel: Prevent accidental clearing of HFI status
  thermal: intel: intel_tcc_cooling: Add TCC cooling support for RaptorLake-S
  thermal: intel: intel_tcc_cooling: Detect TCC lock bit
  thermal: intel: hfi: Improve the type of hfi_features::nr_table_pages
This commit is contained in:
Rafael J. Wysocki 2022-12-02 19:39:07 +01:00
commit 7d4b19ab6b
5 changed files with 52 additions and 31 deletions

View File

@ -42,9 +42,7 @@
#include "../thermal_core.h"
#include "intel_hfi.h"
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
BIT(9) | BIT(11) | BIT(26))
#include "thermal_interrupt.h"
/* Hardware Feedback Interface MSR configuration bits */
#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
@ -137,7 +135,7 @@ struct hfi_instance {
* Parameters and supported features that are common to all HFI instances
*/
struct hfi_features {
unsigned int nr_table_pages;
size_t nr_table_pages;
unsigned int cpu_stride;
unsigned int hdr_size;
};
@ -252,7 +250,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
struct hfi_instance *hfi_instance;
int cpu = smp_processor_id();
struct hfi_cpu_info *info;
u64 new_timestamp;
u64 new_timestamp, msr, hfi;
if (!pkg_therm_status_msr_val)
return;
@ -281,9 +279,21 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
if (!raw_spin_trylock(&hfi_instance->event_lock))
return;
/* Skip duplicated updates. */
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
if (!hfi) {
raw_spin_unlock(&hfi_instance->event_lock);
return;
}
/*
* Ack duplicate update. Since there is an active HFI
* status from HW, it must be a new event, not a case
* where a lagging CPU entered the locked region.
*/
new_timestamp = *(u64 *)hfi_instance->hw_table;
if (*hfi_instance->timestamp == new_timestamp) {
thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
raw_spin_unlock(&hfi_instance->event_lock);
return;
}
@ -297,16 +307,14 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
memcpy(hfi_instance->local_table, hfi_instance->hw_table,
hfi_features.nr_table_pages << PAGE_SHIFT);
raw_spin_unlock(&hfi_instance->table_lock);
raw_spin_unlock(&hfi_instance->event_lock);
/*
* Let hardware know that we are done reading the HFI table and it is
* free to update it again.
*/
pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
~PACKAGE_THERM_STATUS_HFI_UPDATED;
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
raw_spin_unlock(&hfi_instance->table_lock);
raw_spin_unlock(&hfi_instance->event_lock);
queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
HFI_UPDATE_INTERVAL);

View File

@ -14,6 +14,7 @@
#define TCC_SHIFT 24
#define TCC_MASK (0x3fULL<<24)
#define TCC_PROGRAMMABLE BIT(30)
#define TCC_LOCKED BIT(31)
static struct thermal_cooling_device *tcc_cdev;
@ -84,6 +85,7 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),
{}
};
@ -108,6 +110,15 @@ static int __init tcc_cooling_init(void)
if (!(val & TCC_PROGRAMMABLE))
return -ENODEV;
err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
if (err)
return err;
if (val & TCC_LOCKED) {
pr_info("TCC Offset locked\n");
return -ENODEV;
}
pr_info("Programmable TCC Offset detected\n");
tcc_cdev =

View File

@ -190,32 +190,33 @@ static const struct attribute_group thermal_attr_group = {
};
#endif /* CONFIG_SYSFS */
#define CORE_LEVEL 0
#define PACKAGE_LEVEL 1
#define THERM_THROT_POLL_INTERVAL HZ
#define THERM_STATUS_PROCHOT_LOG BIT(1)
#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26))
static void clear_therm_status_log(int level)
/*
* Clear the bits in package thermal status register for bit = 1
* in bitmask
*/
void thermal_clear_package_intr_status(int level, u64 bit_mask)
{
u64 msr_val;
int msr;
u64 mask, msr_val;
if (level == CORE_LEVEL) {
msr = MSR_IA32_THERM_STATUS;
mask = THERM_STATUS_CLEAR_CORE_MASK;
msr_val = THERM_STATUS_CLEAR_CORE_MASK;
} else {
msr = MSR_IA32_PACKAGE_THERM_STATUS;
mask = THERM_STATUS_CLEAR_PKG_MASK;
msr_val = THERM_STATUS_CLEAR_PKG_MASK;
}
rdmsrl(msr, msr_val);
msr_val &= mask;
wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
msr_val &= ~bit_mask;
wrmsrl(msr, msr_val);
}
EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
static void get_therm_status(int level, bool *proc_hot, u8 *temp)
{
@ -295,7 +296,7 @@ static void __maybe_unused throttle_active_work(struct work_struct *work)
state->average = avg;
re_arm:
clear_therm_status_log(state->level);
thermal_clear_package_intr_status(state->level, THERM_STATUS_PROCHOT_LOG);
schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
}

View File

@ -2,6 +2,9 @@
#ifndef _INTEL_THERMAL_INTERRUPT_H
#define _INTEL_THERMAL_INTERRUPT_H
#define CORE_LEVEL 0
#define PACKAGE_LEVEL 1
/* Interrupt Handler for package thermal thresholds */
extern int (*platform_thermal_package_notify)(__u64 msr_val);
@ -15,4 +18,7 @@ extern bool (*platform_thermal_package_rate_control)(void);
/* Handle HWP interrupt */
extern void notify_hwp_interrupt(void);
/* Common function to clear Package thermal status register */
extern void thermal_clear_package_intr_status(int level, u64 bit_mask);
#endif /* _INTEL_THERMAL_INTERRUPT_H */

View File

@ -265,7 +265,6 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
struct thermal_zone_device *tzone = NULL;
int cpu = smp_processor_id();
struct zone_device *zonedev;
u64 msr_val, wr_val;
mutex_lock(&thermal_zone_mutex);
raw_spin_lock_irq(&pkg_temp_lock);
@ -279,12 +278,8 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
}
zonedev->work_scheduled = false;
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
if (wr_val != msr_val) {
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
tzone = zonedev->tzone;
}
thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
tzone = zonedev->tzone;
enable_pkg_thres_interrupt();
raw_spin_unlock_irq(&pkg_temp_lock);