Power management fixes for 5.3-rc5

- Disable NVMe power optimization related to suspend-to-idle added
    recently on systems where PCIe ASPM is not able to put PCIe links
    into low-power states to prevent excess power from being drawn by
    the system while suspended (Rafael Wysocki).
 
  - Make the schedutil governor handle frequency limits changes
    properly in all cases (Viresh Kumar).
 
  - Prevent the cpufreq core from treating positive values returned
    by dev_pm_qos_update_request() as errors (Viresh Kumar).
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAl1WqLMSHHJqd0Byand5
 c29ja2kubmV0AAoJEILEb/54YlRxCkAP/146AuGXj8tOdHxkpl6DVgm0WVRNxCtL
 Z9Y+1xBRBSYVZkeDsjzox995z8Ha/0tnMp6EPcnxebkFpRx3fyldXKUKxqJARPPi
 n2jGhqCPNcAHK2UPdGH8EvHOI2uWBMBa2jW2Qw9m0V/+9Zy58ZvKqso/+myFkz2S
 YRekJPADsI3GZW1SZ3dY4/12jcKsQt32TWaGOLqKx3R1J1BnpyxduXfqJ6FUrH9b
 P/F9cVb2UEbawh5QpNmfMsfBb/DsE08NQhPWe91m0VgcLd6IZsoNux0Rd8HJOvRM
 +5vh6qPTABnNN1+7blFw64/hCu1N2hq8KLl6DzPeKohysKiDkmLh3QGB+ISRpj+H
 5GKF8gnQFvN0fPJF8NU+eIZ0IaOryrooSu4TeCcAWAozJ0ln2mjNoC2h6U1B8Y29
 UH+e2z+6kVTHwjiTjPacjQ0wnkUctoiT71kMxQ8Q+GFG3fQcz3GFFM17eITnAI/Q
 ws1bPHn1ovxl1GmdQwQK3KnT1cK5/fApaVKQLJiRkUvZ1gCZ3ZcruPlh+qA5zpGf
 +RGPXn/Rm1LA1uCkS4j6REBp6vhcVJoVEVnEGzhovdtJcuJ9erlh5I2zz4UxURnn
 cHH48exFmwC+uBhIyQVuYOYgLU3naztBLFg1/l68sMQFonWjIQ/Hp1B9cIgigwbf
 5+BlT1llvIH3
 =eCcy
 -----END PGP SIGNATURE-----

Merge tag 'pm-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management fixes from Rafael Wysocki:
 "These add a check to avoid recent suspend-to-idle power regression on
  systems with NVMe drives where the PCIe ASPM policy is "performance"
  (or when the kernel is built without ASPM support), fix an issue
  related to frequency limits in the schedutil cpufreq governor and fix
  a mistake related to the PM QoS usage in the cpufreq core introduced
  recently.

  Specifics:

   - Disable NVMe power optimization related to suspend-to-idle added
     recently on systems where PCIe ASPM is not able to put PCIe links
     into low-power states to prevent excess power from being drawn by
     the system while suspended (Rafael Wysocki).

   - Make the schedutil governor handle frequency limits changes
     properly in all cases (Viresh Kumar).

   - Prevent the cpufreq core from treating positive values returned by
     dev_pm_qos_update_request() as errors (Viresh Kumar)"

* tag 'pm-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
  PCI/ASPM: Add pcie_aspm_enabled()
  cpufreq: schedutil: Don't skip freq update when limits change
  cpufreq: dev_pm_qos_update_request() can return 1 on success
This commit is contained in:
Linus Torvalds 2019-08-16 09:13:16 -07:00
commit 2d63ba3e41
5 changed files with 43 additions and 8 deletions

View File

@ -2528,7 +2528,7 @@ static int cpufreq_boost_set_sw(int state)
}
ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max);
if (ret)
if (ret < 0)
break;
}

View File

@ -2846,7 +2846,7 @@ static int nvme_resume(struct device *dev)
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
struct nvme_ctrl *ctrl = &ndev->ctrl;
if (pm_resume_via_firmware() || !ctrl->npss ||
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
nvme_reset_ctrl(ctrl);
return 0;
@ -2859,6 +2859,8 @@ static int nvme_suspend(struct device *dev)
struct nvme_ctrl *ctrl = &ndev->ctrl;
int ret = -EBUSY;
ndev->last_ps = U32_MAX;
/*
* The platform does not remove power for a kernel managed suspend so
* use host managed nvme power settings for lowest idle power if
@ -2866,8 +2868,14 @@ static int nvme_suspend(struct device *dev)
* shutdown. But if the firmware is involved after the suspend or the
* device does not support any non-default power states, shut down the
* device fully.
*
* If ASPM is not enabled for the device, shut down the device and allow
* the PCI bus layer to put it into D3 in order to take the PCIe link
* down, so as to allow the platform to achieve its minimum low-power
* state (which may not be possible if the link is up).
*/
if (pm_suspend_via_firmware() || !ctrl->npss) {
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev)) {
nvme_dev_disable(ndev, true);
return 0;
}
@ -2880,7 +2888,6 @@ static int nvme_suspend(struct device *dev)
ctrl->state != NVME_CTRL_ADMIN_ONLY)
goto unfreeze;
ndev->last_ps = 0;
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
if (ret < 0)
goto unfreeze;

View File

@ -1170,6 +1170,26 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
NULL, 0644);
/**
* pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
* @pdev: Target device.
*/
bool pcie_aspm_enabled(struct pci_dev *pdev)
{
struct pci_dev *bridge = pci_upstream_bridge(pdev);
bool ret;
if (!bridge)
return false;
mutex_lock(&aspm_lock);
ret = bridge->link_state ? !!bridge->link_state->aspm_enabled : false;
mutex_unlock(&aspm_lock);
return ret;
}
EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
#ifdef CONFIG_PCIEASPM_DEBUG
static ssize_t link_state_show(struct device *dev,
struct device_attribute *attr,

View File

@ -1567,8 +1567,10 @@ extern bool pcie_ports_native;
#ifdef CONFIG_PCIEASPM
bool pcie_aspm_support_enabled(void);
bool pcie_aspm_enabled(struct pci_dev *pdev);
#else
static inline bool pcie_aspm_support_enabled(void) { return false; }
static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
#endif
#ifdef CONFIG_PCIEAER

View File

@ -40,6 +40,7 @@ struct sugov_policy {
struct task_struct *thread;
bool work_in_progress;
bool limits_changed;
bool need_freq_update;
};
@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
!cpufreq_this_cpu_can_update(sg_policy->policy))
return false;
if (unlikely(sg_policy->need_freq_update))
if (unlikely(sg_policy->limits_changed)) {
sg_policy->limits_changed = false;
sg_policy->need_freq_update = true;
return true;
}
delta_ns = time - sg_policy->last_freq_update_time;
@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
{
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
sg_policy->need_freq_update = true;
sg_policy->limits_changed = true;
}
static void sugov_update_single(struct update_util_data *hook, u64 time,
@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
busy = sugov_cpu_is_busy(sg_cpu);
/* Limits may have changed, don't skip frequency update */
busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->last_freq_update_time = 0;
sg_policy->next_freq = 0;
sg_policy->work_in_progress = false;
sg_policy->limits_changed = false;
sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0;
@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
mutex_unlock(&sg_policy->work_lock);
}
sg_policy->need_freq_update = true;
sg_policy->limits_changed = true;
}
struct cpufreq_governor schedutil_gov = {