2019-04-29 20:29:36 +08:00
|
|
|
/* SPDX-License-Identifier: MIT */
|
|
|
|
/*
|
|
|
|
* Copyright © 2019 Intel Corporation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __INTEL_RUNTIME_PM_H__
|
|
|
|
#define __INTEL_RUNTIME_PM_H__
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
2019-05-22 18:35:05 +08:00
|
|
|
#include "intel_wakeref.h"
|
2019-04-29 20:29:36 +08:00
|
|
|
|
2019-06-14 07:21:52 +08:00
|
|
|
#include "i915_utils.h"
|
|
|
|
|
|
|
|
struct device;
|
2019-05-22 18:35:05 +08:00
|
|
|
struct drm_i915_private;
|
|
|
|
struct drm_printer;
|
2019-04-29 20:29:36 +08:00
|
|
|
|
|
|
|
enum i915_drm_suspend_mode {
|
|
|
|
I915_DRM_SUSPEND_IDLE,
|
|
|
|
I915_DRM_SUSPEND_MEM,
|
|
|
|
I915_DRM_SUSPEND_HIBERNATE,
|
|
|
|
};
|
|
|
|
|
2019-06-14 07:21:52 +08:00
|
|
|
/*
|
|
|
|
* This struct helps tracking the state needed for runtime PM, which puts the
|
|
|
|
* device in PCI D3 state. Notice that when this happens, nothing on the
|
|
|
|
* graphics device works, even register access, so we don't get interrupts nor
|
|
|
|
* anything else.
|
|
|
|
*
|
|
|
|
* Every piece of our code that needs to actually touch the hardware needs to
|
|
|
|
* either call intel_runtime_pm_get or call intel_display_power_get with the
|
|
|
|
* appropriate power domain.
|
|
|
|
*
|
|
|
|
* Our driver uses the autosuspend delay feature, which means we'll only really
|
|
|
|
* suspend if we stay with zero refcount for a certain amount of time. The
|
|
|
|
* default value is currently very conservative (see intel_runtime_pm_enable), but
|
|
|
|
* it can be changed with the standard runtime PM files from sysfs.
|
|
|
|
*
|
|
|
|
* The irqs_disabled variable becomes true exactly after we disable the IRQs and
|
|
|
|
* goes back to false exactly before we reenable the IRQs. We use this variable
|
|
|
|
* to check if someone is trying to enable/disable IRQs while they're supposed
|
|
|
|
* to be disabled. This shouldn't happen and we'll print some error messages in
|
|
|
|
* case it happens.
|
|
|
|
*
|
pci-v5.3-changes
-----BEGIN PGP SIGNATURE-----
iQJIBAABCgAyFiEEgMe7l+5h9hnxdsnuWYigwDrT+vwFAl0siFoUHGJoZWxnYWFz
QGdvb2dsZS5jb20ACgkQWYigwDrT+vzi9A//S4jRyyZrgUr88Az0GbgMhE4b3yqc
uL7om/Sf+443gG6C+aKkZSM/IE9hrbyIKuYq7GGxDkzZ/HkucZo2yIuAHkPgG4ik
QQYJ8fJsmMq1bUht87c1ZZwGP0++Deq/Ns2+VNy/WBYqKLulnV0DvEEaJgPs9C5D
ppwccGdo6UghiujBTpE4ddUBjFjjURWqT6wSnMRDQ4EGwfUhG0MWwwHKI4hbBuaL
N6refuggdYyUUX5FeUOHa6VF6uTnSSAQ75k+40n4nljdayqoumHLskst77o9q5ZI
oXjdpwgmuEqYhfp03HEA4Xo/bBxiRj76NuTiEMKvPokxjpanwbLrdV0GhF0OIlM0
rp1NOI1w+vppFrU+rc2gtq+7hYXFmvdhjS29hFLeD91PP36N5d29jW5NVFpm7GCm
n4TMGAOsu8RB+bNua6ZbZVcDk2EnPgQeIcM0ZPoBtPK19Fg/rScdEU4u/aFE1Y0Q
C+Ks7D1qCvFpHzl/xAg0oo9v/jFsWef3qnQWOzot964Zz4W4NSVvB9Ox6Vbfj6C4
v331LJmlPxG8fxBNA3q28FrTxcG1NW6sgo3WY9VoSp/vc0aqaPKhm7sbraTt5IrI
TwqA/WhnAHv90MQCGFcofANyYTkjPkKk2QBFK6b0suoAmVdwVWWELi1WaZ+HdvgQ
JP7YpmC2cXcQBPk=
=ZGxL
-----END PGP SIGNATURE-----
Merge tag 'pci-v5.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
Pull PCI updates from Bjorn Helgaas:
"Enumeration changes:
- Evaluate PCI Boot Configuration _DSM to learn if firmware wants us
to preserve its resource assignments (Benjamin Herrenschmidt)
- Simplify resource distribution (Nicholas Johnson)
- Decode 32 GT/s link speed (Gustavo Pimentel)
Virtualization:
- Fix incorrect caching of VF config space size (Alex Williamson)
- Fix VF driver probing sysfs knobs (Alex Williamson)
Peer-to-peer DMA:
- Fix dma_virt_ops check (Logan Gunthorpe)
Altera host bridge driver:
- Allow building as module (Ley Foon Tan)
Armada 8K host bridge driver:
- add PHYs support (Miquel Raynal)
DesignWare host bridge driver:
- Export APIs to support removable loadable module (Vidya Sagar)
- Enable Relaxed Ordering erratum workaround only on Tegra20 &
Tegra30 (Vidya Sagar)
Hyper-V host bridge driver:
- Fix use-after-free in eject (Dexuan Cui)
Mobiveil host bridge driver:
- Clean up and fix many issues, including non-identify mapped
windows, 64-bit windows, multi-MSI, class code, INTx clearing (Hou
Zhiqiang)
Qualcomm host bridge driver:
- Use clk bulk API for 2.4.0 controllers (Bjorn Andersson)
- Add QCS404 support (Bjorn Andersson)
- Assert PERST for at least 100ms (Niklas Cassel)
R-Car host bridge driver:
- Add r8a774a1 DT support (Biju Das)
Tegra host bridge driver:
- Add support for Gen2, opportunistic UpdateFC and ACK (PCIe protocol
details) AER, GPIO-based PERST# (Manikanta Maddireddy)
- Fix many issues, including power-on failure cases, interrupt
masking in suspend, UPHY settings, AFI dynamic clock gating,
pending DLL transactions (Manikanta Maddireddy)
Xilinx host bridge driver:
- Fix NWL Multi-MSI programming (Bharat Kumar Gogada)
Endpoint support:
- Fix 64bit BAR support (Alan Mikhak)
- Fix pcitest build issues (Alan Mikhak, Andy Shevchenko)
Bug fixes:
- Fix NVIDIA GPU multi-function power dependencies (Abhishek Sahu)
- Fix NVIDIA GPU HDA enablement issue (Lukas Wunner)
- Ignore lockdep for sysfs "remove" (Marek Vasut)
Misc:
- Convert docs to reST (Changbin Du, Mauro Carvalho Chehab)"
* tag 'pci-v5.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (107 commits)
PCI: Enable NVIDIA HDA controllers
tools: PCI: Fix installation when `make tools/pci_install`
PCI: dwc: pci-dra7xx: Fix compilation when !CONFIG_GPIOLIB
PCI: Fix typos and whitespace errors
PCI: mobiveil: Fix INTx interrupt clearing in mobiveil_pcie_isr()
PCI: mobiveil: Fix infinite-loop in the INTx handling function
PCI: mobiveil: Move PCIe PIO enablement out of inbound window routine
PCI: mobiveil: Add upper 32-bit PCI base address setup in inbound window
PCI: mobiveil: Add upper 32-bit CPU base address setup in outbound window
PCI: mobiveil: Mask out hardcoded bits in inbound/outbound windows setup
PCI: mobiveil: Clear the control fields before updating it
PCI: mobiveil: Add configured inbound windows counter
PCI: mobiveil: Fix the valid check for inbound and outbound windows
PCI: mobiveil: Clean-up program_{ib/ob}_windows()
PCI: mobiveil: Remove an unnecessary return value check
PCI: mobiveil: Fix error return values
PCI: mobiveil: Refactor the MEM/IO outbound window initialization
PCI: mobiveil: Make some register updates more readable
PCI: mobiveil: Reformat the code for readability
dt-bindings: PCI: mobiveil: Change gpio_slave and apb_csr to optional
...
2019-07-16 11:44:49 +08:00
|
|
|
* For more, read the Documentation/power/runtime_pm.rst.
|
2019-06-14 07:21:52 +08:00
|
|
|
*/
|
|
|
|
struct intel_runtime_pm {
|
|
|
|
atomic_t wakeref_count;
|
2021-04-29 18:50:59 +08:00
|
|
|
struct device *kdev; /* points to i915->drm.dev */
|
2019-06-14 07:21:52 +08:00
|
|
|
bool available;
|
|
|
|
bool suspended;
|
|
|
|
bool irqs_enabled;
|
2022-02-05 01:10:53 +08:00
|
|
|
bool no_wakeref_tracking;
|
2019-06-14 07:21:52 +08:00
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
|
|
|
|
/*
|
|
|
|
* To aide detection of wakeref leaks and general misuse, we
|
|
|
|
* track all wakeref holders. With manual markup (i.e. returning
|
|
|
|
* a cookie to each rpm_get caller which they then supply to their
|
|
|
|
* paired rpm_put) we can remove corresponding pairs of and keep
|
|
|
|
* the array trimmed to active wakerefs.
|
|
|
|
*/
|
|
|
|
struct intel_runtime_pm_debug {
|
|
|
|
spinlock_t lock;
|
|
|
|
|
|
|
|
depot_stack_handle_t last_acquire;
|
|
|
|
depot_stack_handle_t last_release;
|
|
|
|
|
|
|
|
depot_stack_handle_t *owners;
|
|
|
|
unsigned long count;
|
|
|
|
} debug;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
#define BITS_PER_WAKEREF \
|
|
|
|
BITS_PER_TYPE(struct_member(struct intel_runtime_pm, wakeref_count))
|
|
|
|
#define INTEL_RPM_WAKELOCK_SHIFT (BITS_PER_WAKEREF / 2)
|
|
|
|
#define INTEL_RPM_WAKELOCK_BIAS (1 << INTEL_RPM_WAKELOCK_SHIFT)
|
|
|
|
#define INTEL_RPM_RAW_WAKEREF_MASK (INTEL_RPM_WAKELOCK_BIAS - 1)
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
intel_rpm_raw_wakeref_count(int wakeref_count)
|
|
|
|
{
|
|
|
|
return wakeref_count & INTEL_RPM_RAW_WAKEREF_MASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
intel_rpm_wakelock_count(int wakeref_count)
|
|
|
|
{
|
|
|
|
return wakeref_count >> INTEL_RPM_WAKELOCK_SHIFT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
assert_rpm_device_not_suspended(struct intel_runtime_pm *rpm)
|
|
|
|
{
|
|
|
|
WARN_ONCE(rpm->suspended,
|
|
|
|
"Device suspended during HW access\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__assert_rpm_raw_wakeref_held(struct intel_runtime_pm *rpm, int wakeref_count)
|
|
|
|
{
|
|
|
|
assert_rpm_device_not_suspended(rpm);
|
|
|
|
WARN_ONCE(!intel_rpm_raw_wakeref_count(wakeref_count),
|
|
|
|
"RPM raw-wakeref not held\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__assert_rpm_wakelock_held(struct intel_runtime_pm *rpm, int wakeref_count)
|
|
|
|
{
|
|
|
|
__assert_rpm_raw_wakeref_held(rpm, wakeref_count);
|
|
|
|
WARN_ONCE(!intel_rpm_wakelock_count(wakeref_count),
|
|
|
|
"RPM wakelock ref not held during HW access\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
assert_rpm_raw_wakeref_held(struct intel_runtime_pm *rpm)
|
|
|
|
{
|
|
|
|
__assert_rpm_raw_wakeref_held(rpm, atomic_read(&rpm->wakeref_count));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
assert_rpm_wakelock_held(struct intel_runtime_pm *rpm)
|
|
|
|
{
|
|
|
|
__assert_rpm_wakelock_held(rpm, atomic_read(&rpm->wakeref_count));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* disable_rpm_wakeref_asserts - disable the RPM assert checks
|
|
|
|
* @rpm: the intel_runtime_pm structure
|
|
|
|
*
|
|
|
|
* This function disable asserts that check if we hold an RPM wakelock
|
|
|
|
* reference, while keeping the device-not-suspended checks still enabled.
|
|
|
|
* It's meant to be used only in special circumstances where our rule about
|
|
|
|
* the wakelock refcount wrt. the device power state doesn't hold. According
|
|
|
|
* to this rule at any point where we access the HW or want to keep the HW in
|
|
|
|
* an active state we must hold an RPM wakelock reference acquired via one of
|
|
|
|
* the intel_runtime_pm_get() helpers. Currently there are a few special spots
|
|
|
|
* where this rule doesn't hold: the IRQ and suspend/resume handlers, the
|
|
|
|
* forcewake release timer, and the GPU RPS and hangcheck works. All other
|
|
|
|
* users should avoid using this function.
|
|
|
|
*
|
|
|
|
* Any calls to this function must have a symmetric call to
|
|
|
|
* enable_rpm_wakeref_asserts().
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
disable_rpm_wakeref_asserts(struct intel_runtime_pm *rpm)
|
|
|
|
{
|
|
|
|
atomic_add(INTEL_RPM_WAKELOCK_BIAS + 1,
|
|
|
|
&rpm->wakeref_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* enable_rpm_wakeref_asserts - re-enable the RPM assert checks
|
|
|
|
* @rpm: the intel_runtime_pm structure
|
|
|
|
*
|
|
|
|
* This function re-enables the RPM assert checks after disabling them with
|
|
|
|
* disable_rpm_wakeref_asserts. It's meant to be used only in special
|
|
|
|
* circumstances otherwise its use should be avoided.
|
|
|
|
*
|
|
|
|
* Any calls to this function must have a symmetric call to
|
|
|
|
* disable_rpm_wakeref_asserts().
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
enable_rpm_wakeref_asserts(struct intel_runtime_pm *rpm)
|
|
|
|
{
|
|
|
|
atomic_sub(INTEL_RPM_WAKELOCK_BIAS + 1,
|
|
|
|
&rpm->wakeref_count);
|
|
|
|
}
|
|
|
|
|
2019-06-14 07:21:53 +08:00
|
|
|
void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
|
|
|
|
void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
|
|
|
|
void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
|
2019-07-12 19:24:28 +08:00
|
|
|
void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
|
2019-04-29 20:29:36 +08:00
|
|
|
|
2019-06-14 07:21:54 +08:00
|
|
|
intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
|
|
|
|
intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
|
2021-03-23 04:28:17 +08:00
|
|
|
intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm);
|
2019-06-14 07:21:54 +08:00
|
|
|
intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm);
|
|
|
|
intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm);
|
2019-04-29 20:29:36 +08:00
|
|
|
|
2019-06-14 07:21:55 +08:00
|
|
|
#define with_intel_runtime_pm(rpm, wf) \
|
|
|
|
for ((wf) = intel_runtime_pm_get(rpm); (wf); \
|
|
|
|
intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
|
2019-04-29 20:29:36 +08:00
|
|
|
|
2019-06-14 07:21:55 +08:00
|
|
|
#define with_intel_runtime_pm_if_in_use(rpm, wf) \
|
|
|
|
for ((wf) = intel_runtime_pm_get_if_in_use(rpm); (wf); \
|
|
|
|
intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
|
2019-04-29 20:29:36 +08:00
|
|
|
|
2021-03-23 04:28:17 +08:00
|
|
|
#define with_intel_runtime_pm_if_active(rpm, wf) \
|
|
|
|
for ((wf) = intel_runtime_pm_get_if_active(rpm); (wf); \
|
|
|
|
intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
|
|
|
|
|
2019-06-14 07:21:54 +08:00
|
|
|
void intel_runtime_pm_put_unchecked(struct intel_runtime_pm *rpm);
|
2019-04-29 20:29:36 +08:00
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
|
2019-06-14 07:21:54 +08:00
|
|
|
void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref);
|
2019-04-29 20:29:36 +08:00
|
|
|
#else
|
drm/i915: Add support for asynchronous display power disabling
By disabling a power domain asynchronously we can restrict holding a
reference on that power domain to the actual code sequence that
requires the power to be on for the HW access it's doing, by also
avoiding unneeded on-off-on togglings of the power domain (since the
disabling happens with a delay).
One benefit is potential power saving due to the following two reasons:
1. The fact that we will now be holding the reference only for the
necessary duration by the end of the patchset. While simply not
delaying the disabling has the same benefit, it has the problem that
frequent on-off-on power switching has its own power cost (see the 2.
point below) and the debug trace for power well on/off events will
cause a lot of dmesg spam (see details about this further below).
2. Avoiding the power cost of freuqent on-off-on power switching. This
requires us to find the optimal disabling delay based on the measured
power cost of on->off and off->on switching of each power well vs.
the power of keeping the given power well on.
In this patchset I'm not providing this optimal delay for two
reasons:
a) I don't have the means yet to perform the measurement (with high
enough signal-to-noise ratio, or with the help of an energy
counter that takes switching into account). I'm currently looking
for a way to measure this.
b) Before reducing the disabling delay we need an alternative way for
debug tracing powerwell on/off events. Simply avoiding/throttling
the debug messages is not a solution, see further below.
Note that even in the case where we can't measure any considerable
power cost of frequent on-off switching of powerwells, it still would
make sense to do the disabling asynchronously (with 0 delay) to avoid
blocking on the disabling. On VLV I measured this disabling time
overhead to be 1ms on average with a worst case of 4ms.
In the case of the AUX power domains on ICL we would also need to keep
the sequence where we hold the power reference short, the way it would
be by the end of this patchset where we hold it only for the actual AUX
transfer. Anything else would make the locking we need for ICL TypeC
ports (whenever we hold a reference on any AUX power domain) rather
problematic, adding for instance unnecessary lockdep dependencies to
the required TypeC port lock.
I chose the disabling delay to be 100msec for now to avoid the unneeded
toggling (and so not to introduce dmesg spamming) in the DP MST sideband
signaling code. We could optimize this delay later, once we have the
means to measure the switching power cost (see above).
Note that simply removing/throttling the debug tracing for power well
on/off events is not a solution. We need to know the exact spots of
these events and cannot rely only on incorrect register accesses caught
(due to not holding a wakeref at the time of access). Incorrect
powerwell enabling/disabling could lead to other problems, for instance
we need to keep certain powerwells enabled for the duration of modesets
and AUX transfers.
v2:
- Clarify the commit log parts about power cost measurement and the
problem of simply removing/throttling debug tracing. (Chris)
- Optimize out local wakeref vars at intel_runtime_pm_put_raw() and
intel_display_power_put_async() call sites if
CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n. (Chris)
- Rebased on v2 of the wakeref w/o power-on guarantee patch.
- Add missing docbook headers.
v3:
- Checkpatch spelling/missing-empty-line fix.
v4:
- Fix unintended local wakeref var optimization when using
call-arguments with side-effects, by using inline funcs instead of
macros. In this patch in particular this will fix the
intel_display_power_grab_async_put_ref()->intel_runtime_pm_put_raw()
call).
No size change in practice (would be the same disregarding the
corresponding change in intel_display_power_grab_async_put_ref()):
$ size i915-macro.ko
text data bss dec hex filename
2455190 105890 10272 2571352 273c58 i915-macro.ko
$ size i915-inline.ko
text data bss dec hex filename
2455195 105890 10272 2571357 273c5d i915-inline.ko
Kudos to Stan for reporting the raw-wakeref WARNs this issue caused. His
config has CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n, which I didn't retest
after v1, and we are also not testing this config in CI.
Now tested both with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y/n on ICL,
connecting both Chamelium and regular DP, HDMI sinks.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190513192533.12586-1-imre.deak@intel.com
2019-05-14 03:25:33 +08:00
|
|
|
static inline void
|
2019-06-14 07:21:54 +08:00
|
|
|
intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
|
drm/i915: Add support for asynchronous display power disabling
By disabling a power domain asynchronously we can restrict holding a
reference on that power domain to the actual code sequence that
requires the power to be on for the HW access it's doing, by also
avoiding unneeded on-off-on togglings of the power domain (since the
disabling happens with a delay).
One benefit is potential power saving due to the following two reasons:
1. The fact that we will now be holding the reference only for the
necessary duration by the end of the patchset. While simply not
delaying the disabling has the same benefit, it has the problem that
frequent on-off-on power switching has its own power cost (see the 2.
point below) and the debug trace for power well on/off events will
cause a lot of dmesg spam (see details about this further below).
2. Avoiding the power cost of freuqent on-off-on power switching. This
requires us to find the optimal disabling delay based on the measured
power cost of on->off and off->on switching of each power well vs.
the power of keeping the given power well on.
In this patchset I'm not providing this optimal delay for two
reasons:
a) I don't have the means yet to perform the measurement (with high
enough signal-to-noise ratio, or with the help of an energy
counter that takes switching into account). I'm currently looking
for a way to measure this.
b) Before reducing the disabling delay we need an alternative way for
debug tracing powerwell on/off events. Simply avoiding/throttling
the debug messages is not a solution, see further below.
Note that even in the case where we can't measure any considerable
power cost of frequent on-off switching of powerwells, it still would
make sense to do the disabling asynchronously (with 0 delay) to avoid
blocking on the disabling. On VLV I measured this disabling time
overhead to be 1ms on average with a worst case of 4ms.
In the case of the AUX power domains on ICL we would also need to keep
the sequence where we hold the power reference short, the way it would
be by the end of this patchset where we hold it only for the actual AUX
transfer. Anything else would make the locking we need for ICL TypeC
ports (whenever we hold a reference on any AUX power domain) rather
problematic, adding for instance unnecessary lockdep dependencies to
the required TypeC port lock.
I chose the disabling delay to be 100msec for now to avoid the unneeded
toggling (and so not to introduce dmesg spamming) in the DP MST sideband
signaling code. We could optimize this delay later, once we have the
means to measure the switching power cost (see above).
Note that simply removing/throttling the debug tracing for power well
on/off events is not a solution. We need to know the exact spots of
these events and cannot rely only on incorrect register accesses caught
(due to not holding a wakeref at the time of access). Incorrect
powerwell enabling/disabling could lead to other problems, for instance
we need to keep certain powerwells enabled for the duration of modesets
and AUX transfers.
v2:
- Clarify the commit log parts about power cost measurement and the
problem of simply removing/throttling debug tracing. (Chris)
- Optimize out local wakeref vars at intel_runtime_pm_put_raw() and
intel_display_power_put_async() call sites if
CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n. (Chris)
- Rebased on v2 of the wakeref w/o power-on guarantee patch.
- Add missing docbook headers.
v3:
- Checkpatch spelling/missing-empty-line fix.
v4:
- Fix unintended local wakeref var optimization when using
call-arguments with side-effects, by using inline funcs instead of
macros. In this patch in particular this will fix the
intel_display_power_grab_async_put_ref()->intel_runtime_pm_put_raw()
call).
No size change in practice (would be the same disregarding the
corresponding change in intel_display_power_grab_async_put_ref()):
$ size i915-macro.ko
text data bss dec hex filename
2455190 105890 10272 2571352 273c58 i915-macro.ko
$ size i915-inline.ko
text data bss dec hex filename
2455195 105890 10272 2571357 273c5d i915-inline.ko
Kudos to Stan for reporting the raw-wakeref WARNs this issue caused. His
config has CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n, which I didn't retest
after v1, and we are also not testing this config in CI.
Now tested both with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y/n on ICL,
connecting both Chamelium and regular DP, HDMI sinks.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190513192533.12586-1-imre.deak@intel.com
2019-05-14 03:25:33 +08:00
|
|
|
{
|
2019-06-14 07:21:54 +08:00
|
|
|
intel_runtime_pm_put_unchecked(rpm);
|
drm/i915: Add support for asynchronous display power disabling
By disabling a power domain asynchronously we can restrict holding a
reference on that power domain to the actual code sequence that
requires the power to be on for the HW access it's doing, by also
avoiding unneeded on-off-on togglings of the power domain (since the
disabling happens with a delay).
One benefit is potential power saving due to the following two reasons:
1. The fact that we will now be holding the reference only for the
necessary duration by the end of the patchset. While simply not
delaying the disabling has the same benefit, it has the problem that
frequent on-off-on power switching has its own power cost (see the 2.
point below) and the debug trace for power well on/off events will
cause a lot of dmesg spam (see details about this further below).
2. Avoiding the power cost of freuqent on-off-on power switching. This
requires us to find the optimal disabling delay based on the measured
power cost of on->off and off->on switching of each power well vs.
the power of keeping the given power well on.
In this patchset I'm not providing this optimal delay for two
reasons:
a) I don't have the means yet to perform the measurement (with high
enough signal-to-noise ratio, or with the help of an energy
counter that takes switching into account). I'm currently looking
for a way to measure this.
b) Before reducing the disabling delay we need an alternative way for
debug tracing powerwell on/off events. Simply avoiding/throttling
the debug messages is not a solution, see further below.
Note that even in the case where we can't measure any considerable
power cost of frequent on-off switching of powerwells, it still would
make sense to do the disabling asynchronously (with 0 delay) to avoid
blocking on the disabling. On VLV I measured this disabling time
overhead to be 1ms on average with a worst case of 4ms.
In the case of the AUX power domains on ICL we would also need to keep
the sequence where we hold the power reference short, the way it would
be by the end of this patchset where we hold it only for the actual AUX
transfer. Anything else would make the locking we need for ICL TypeC
ports (whenever we hold a reference on any AUX power domain) rather
problematic, adding for instance unnecessary lockdep dependencies to
the required TypeC port lock.
I chose the disabling delay to be 100msec for now to avoid the unneeded
toggling (and so not to introduce dmesg spamming) in the DP MST sideband
signaling code. We could optimize this delay later, once we have the
means to measure the switching power cost (see above).
Note that simply removing/throttling the debug tracing for power well
on/off events is not a solution. We need to know the exact spots of
these events and cannot rely only on incorrect register accesses caught
(due to not holding a wakeref at the time of access). Incorrect
powerwell enabling/disabling could lead to other problems, for instance
we need to keep certain powerwells enabled for the duration of modesets
and AUX transfers.
v2:
- Clarify the commit log parts about power cost measurement and the
problem of simply removing/throttling debug tracing. (Chris)
- Optimize out local wakeref vars at intel_runtime_pm_put_raw() and
intel_display_power_put_async() call sites if
CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n. (Chris)
- Rebased on v2 of the wakeref w/o power-on guarantee patch.
- Add missing docbook headers.
v3:
- Checkpatch spelling/missing-empty-line fix.
v4:
- Fix unintended local wakeref var optimization when using
call-arguments with side-effects, by using inline funcs instead of
macros. In this patch in particular this will fix the
intel_display_power_grab_async_put_ref()->intel_runtime_pm_put_raw()
call).
No size change in practice (would be the same disregarding the
corresponding change in intel_display_power_grab_async_put_ref()):
$ size i915-macro.ko
text data bss dec hex filename
2455190 105890 10272 2571352 273c58 i915-macro.ko
$ size i915-inline.ko
text data bss dec hex filename
2455195 105890 10272 2571357 273c5d i915-inline.ko
Kudos to Stan for reporting the raw-wakeref WARNs this issue caused. His
config has CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n, which I didn't retest
after v1, and we are also not testing this config in CI.
Now tested both with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y/n on ICL,
connecting both Chamelium and regular DP, HDMI sinks.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190513192533.12586-1-imre.deak@intel.com
2019-05-14 03:25:33 +08:00
|
|
|
}
|
2019-04-29 20:29:36 +08:00
|
|
|
#endif
|
2019-06-14 07:21:54 +08:00
|
|
|
void intel_runtime_pm_put_raw(struct intel_runtime_pm *rpm, intel_wakeref_t wref);
|
2019-04-29 20:29:36 +08:00
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
|
2019-06-14 07:21:53 +08:00
|
|
|
void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
|
2019-04-29 20:29:36 +08:00
|
|
|
struct drm_printer *p);
|
|
|
|
#else
|
2019-06-14 07:21:53 +08:00
|
|
|
static inline void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
|
2019-04-29 20:29:36 +08:00
|
|
|
struct drm_printer *p)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* __INTEL_RUNTIME_PM_H__ */
|