drm fixes for 5.8-rc2
i915: - Fix for timeslicing and virtual engines/unpremptable requests (+ 1 dependency patch) - Fixes into TypeC register programming and interrupt storm detecting - Disable DIP on MST ports with the transcoder clock still on - Avoid missing GT workarounds at reset for HSW and older gens - Fix for unwinding multiple requests missing force restore - Fix encoder type check for DDI vswing sequence - Build warning fixes amdgpu: - Fix kvfree/kfree mixup - Fix hawaii device id in powertune configuration - Display FP fixes - Documentation fixes amdkfd: - devcgroup check fix -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJe7Cm0AAoJEAx081l5xIa+keYP/3ieT0ZvTj1A9WUSHjA54YVr y4ifXwo1wtsB8jwdsCLa7gEzDN41tJ6zU4vxOZ4sz8WIgE9ulerM7E6hqJqHm8mt YF8TCOpy7rl/SuMa7NPNFkglJgFGm3uNZU2KUWUpexYQlBc8d3+rPQXEYSvRCvb2 e//h5eKXAWAfMj4ZlYwy721lEyc55agb3FAguRHiVEj+x9TBWbMU6HQzzxhHL6hh 6zjWgJEHD/B/JDtS508bHmI/yCoe21LVENiKyHLpWu2zCNcdqdsQz+d7z2gnR2ir MsebnqdOxlGmR6VdpqPaE3jukL1okH/SEk4akGOOhjlM3U/MmsJz8n22aYsXlzK7 XHvyTjSdO8Spd+taLKEmZkIj++tVFHN9RR4uXbweknsqEhec6rakKFW3fsqhIC7G 9fTWevP5lRyOLD1xqt7aWpGf+21dbcJEtN+Gnrmb+eOl+5WWShS3KE384XZ4k/n1 G7l8ETT1okMdFU7YU9Ws9tAFYsM6bdt5mD2N1g9jIr2L/F78upwzurNMz2FLJUQZ CMsPUIw0luWNSBXi91TjF0Itt1gSy8x8lC/Be3lN94KuvbO6yuUIznO8jshrHcLW T8kzP3IZxCm29pvGBDuA5t3vTI6bTVfLH9vXmAONuPRUQx62XXjrLywGmXYe0C+g uHmfLYRmvHvdru5K+muP =lvAZ -----END PGP SIGNATURE----- Merge tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm Pull drm fixes from Dave Airlie: "Just i915 and amd here. i915 has some workaround movement so they get applied at the right times, and a timeslicing fix, along with some display fixes. AMD has a few display floating point fix and a devcgroup fix for amdkfd. i915: - Fix for timeslicing and virtual engines/unpremptable requests (+ 1 dependency patch) - Fixes into TypeC register programming and interrupt storm detecting - Disable DIP on MST ports with the transcoder clock still on - Avoid missing GT workarounds at reset for HSW and older gens - Fix for unwinding multiple requests missing force restore - Fix encoder type check for DDI vswing sequence - Build warning fixes amdgpu: - Fix kvfree/kfree mixup - Fix hawaii device id in powertune configuration - Display FP fixes - Documentation fixes amdkfd: - devcgroup check fix" * tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm: (23 commits) drm/amdgpu: fix documentation around busy_percentage drm/amdgpu/pm: update comment to clarify Overdrive interfaces drm/amdkfd: Use correct major in devcgroup check drm/i915/display: Fix the encoder type check drm/i915/icl+: Fix hotplug interrupt disabling after storm detection drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on drm/i915/gt: Incrementally check for rewinding drm/i915/tc: fix the reset of ln0 drm/i915/gt: Prevent timeslicing into unpreemptable requests drm/i915/selftests: Restore to default heartbeat drm/i915: work around false-positive maybe-uninitialized warning drm/i915/pmu: avoid an maybe-uninitialized warning drm/i915/gt: Incorporate the virtual engine into timeslicing drm/amd/display: Rework dsc to isolate FPU operations ...
This commit is contained in:
commit
62c91ead97
|
@ -197,11 +197,14 @@ pp_power_profile_mode
|
|||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
|
||||
:doc: pp_power_profile_mode
|
||||
|
||||
busy_percent
|
||||
~~~~~~~~~~~~
|
||||
*_busy_percent
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
|
||||
:doc: busy_percent
|
||||
:doc: gpu_busy_percent
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
|
||||
:doc: mem_busy_percent
|
||||
|
||||
GPU Product Information
|
||||
=======================
|
||||
|
|
|
@ -696,7 +696,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
|
|||
* default power levels, write "r" (reset) to the file to reset them.
|
||||
*
|
||||
*
|
||||
* < For Vega20 >
|
||||
* < For Vega20 and newer ASICs >
|
||||
*
|
||||
* Reading the file will display:
|
||||
*
|
||||
|
@ -1668,7 +1668,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
|
|||
}
|
||||
|
||||
/**
|
||||
* DOC: busy_percent
|
||||
* DOC: gpu_busy_percent
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reading how busy the GPU
|
||||
* is as a percentage. The file gpu_busy_percent is used for this.
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <drm/drm_file.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_ioctl.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <linux/swap.h>
|
||||
|
||||
|
@ -1076,7 +1077,7 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
|
|||
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
|
||||
struct drm_device *ddev = kfd->ddev;
|
||||
|
||||
return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major,
|
||||
return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
|
||||
ddev->render->index,
|
||||
DEVCG_ACC_WRITE | DEVCG_ACC_READ);
|
||||
#else
|
||||
|
|
|
@ -28,8 +28,6 @@ endif
|
|||
endif
|
||||
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags)
|
||||
|
||||
DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
|
||||
|
||||
|
|
|
@ -22,10 +22,12 @@
|
|||
* Author: AMD
|
||||
*/
|
||||
|
||||
#include <drm/drm_dsc.h>
|
||||
#include "dc_hw_types.h"
|
||||
#include "dsc.h"
|
||||
#include <drm/drm_dp_helper.h>
|
||||
#include "dc.h"
|
||||
#include "rc_calc.h"
|
||||
|
||||
/* This module's internal functions */
|
||||
|
||||
|
@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
|
|||
return (value + 9) / 10;
|
||||
}
|
||||
|
||||
static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div)
|
||||
{
|
||||
uint32_t dsc_target_bpp_x16;
|
||||
float f_dsc_target_bpp;
|
||||
float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
|
||||
uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision
|
||||
|
||||
f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
|
||||
|
||||
// Round down to the nearest precision stop to bring it into DSC spec range
|
||||
dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision);
|
||||
dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
|
||||
|
||||
return dsc_target_bpp_x16;
|
||||
}
|
||||
|
||||
/* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
|
||||
* and uncompressed bandwidth.
|
||||
*/
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
* Authors: AMD
|
||||
*
|
||||
*/
|
||||
#include <drm/drm_dsc.h>
|
||||
|
||||
#include "os_types.h"
|
||||
#include "rc_calc.h"
|
||||
|
@ -40,7 +41,8 @@
|
|||
break
|
||||
|
||||
|
||||
void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
|
||||
static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
|
||||
enum max_min max_min, float bpp)
|
||||
{
|
||||
int mode = MODE_SELECT(444, 422, 420);
|
||||
int sel = table_hash(mode, bpc, max_min);
|
||||
|
@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma
|
|||
memcpy(qps, table[index].qps, sizeof(qp_set));
|
||||
}
|
||||
|
||||
double dsc_roundf(double num)
|
||||
static double dsc_roundf(double num)
|
||||
{
|
||||
if (num < 0.0)
|
||||
num = num - 0.5;
|
||||
|
@ -95,7 +97,7 @@ double dsc_roundf(double num)
|
|||
return (int)(num);
|
||||
}
|
||||
|
||||
double dsc_ceil(double num)
|
||||
static double dsc_ceil(double num)
|
||||
{
|
||||
double retval = (int)num;
|
||||
|
||||
|
@ -105,7 +107,7 @@ double dsc_ceil(double num)
|
|||
return (int)retval;
|
||||
}
|
||||
|
||||
void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
||||
static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
||||
{
|
||||
int *p = ofs;
|
||||
|
||||
|
@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
|||
}
|
||||
}
|
||||
|
||||
int median3(int a, int b, int c)
|
||||
static int median3(int a, int b, int c)
|
||||
{
|
||||
if (a > b)
|
||||
swap(a, b);
|
||||
|
@ -172,13 +174,25 @@ int median3(int a, int b, int c)
|
|||
return b;
|
||||
}
|
||||
|
||||
void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
|
||||
static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
|
||||
enum bits_per_comp bpc, u8 drm_bpp,
|
||||
bool is_navite_422_or_420,
|
||||
int slice_width, int slice_height,
|
||||
int minor_version)
|
||||
{
|
||||
float bpp;
|
||||
float bpp_group;
|
||||
float initial_xmit_delay_factor;
|
||||
int padding_pixels;
|
||||
int i;
|
||||
|
||||
bpp = ((float)drm_bpp / 16.0);
|
||||
/* in native_422 or native_420 modes, the bits_per_pixel is double the
|
||||
* target bpp (the latter is what calc_rc_params expects)
|
||||
*/
|
||||
if (is_navite_422_or_420)
|
||||
bpp /= 2.0;
|
||||
|
||||
rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
|
||||
rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
|
||||
|
||||
|
@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
|
|||
rc->rc_buf_thresh[13] = 8064;
|
||||
}
|
||||
|
||||
static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp,
|
||||
bool is_navite_422_or_420)
|
||||
{
|
||||
float bpp;
|
||||
u32 bytes_per_pixel;
|
||||
double d_bytes_per_pixel;
|
||||
|
||||
bpp = ((float)drm_bpp / 16.0);
|
||||
d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
|
||||
// TODO: Make sure the formula for calculating this is precise (ceiling
|
||||
// vs. floor, and at what point they should be applied)
|
||||
if (is_navite_422_or_420)
|
||||
d_bytes_per_pixel /= 2;
|
||||
|
||||
bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
|
||||
|
||||
return bytes_per_pixel;
|
||||
}
|
||||
|
||||
static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div)
|
||||
{
|
||||
u32 dsc_target_bpp_x16;
|
||||
float f_dsc_target_bpp;
|
||||
float f_stream_bandwidth_100bps;
|
||||
// bpp_increment_div is actually precision
|
||||
u32 precision = bpp_increment_div;
|
||||
|
||||
f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
|
||||
f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
|
||||
|
||||
// Round down to the nearest precision stop to bring it into DSC spec
|
||||
// range
|
||||
dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
|
||||
dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
|
||||
|
||||
return dsc_target_bpp_x16;
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_rc_params - reads the user's cmdline mode
|
||||
* @rc: DC internal DSC parameters
|
||||
* @pps: DRM struct with all required DSC values
|
||||
*
|
||||
* This function expects a drm_dsc_config data struct with all the required DSC
|
||||
* values previously filled out by our driver and based on this information it
|
||||
* computes some of the DSC values.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*/
|
||||
void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
|
||||
{
|
||||
enum colour_mode mode;
|
||||
enum bits_per_comp bpc;
|
||||
bool is_navite_422_or_420;
|
||||
u8 drm_bpp = pps->bits_per_pixel;
|
||||
int slice_width = pps->slice_width;
|
||||
int slice_height = pps->slice_height;
|
||||
|
||||
mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 :
|
||||
(pps->native_422 ? CM_422 :
|
||||
pps->native_420 ? CM_420 : CM_444));
|
||||
bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
|
||||
? BPC_10 : BPC_12;
|
||||
|
||||
is_navite_422_or_420 = pps->native_422 || pps->native_420;
|
||||
|
||||
DC_FP_START();
|
||||
_do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
|
||||
slice_width, slice_height,
|
||||
pps->dsc_version_minor);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_dsc_bytes_per_pixel - calculate bytes per pixel
|
||||
* @pps: DRM struct with all required DSC values
|
||||
*
|
||||
* Based on the information inside drm_dsc_config, this function calculates the
|
||||
* total of bytes per pixel.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*
|
||||
* Return:
|
||||
* Return the number of bytes per pixel
|
||||
*/
|
||||
u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
|
||||
|
||||
{
|
||||
u32 ret;
|
||||
u8 drm_bpp = pps->bits_per_pixel;
|
||||
int slice_width = pps->slice_width;
|
||||
bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
|
||||
|
||||
DC_FP_START();
|
||||
ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
|
||||
is_navite_422_or_420);
|
||||
DC_FP_END();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
|
||||
* @stream_bandwidth_kbps:
|
||||
* @pix_clk_100hz:
|
||||
* @bpp_increment_div:
|
||||
*
|
||||
* Calculate the total of bits per pixel for DSC configuration.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*/
|
||||
u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div)
|
||||
{
|
||||
u32 dsc_bpp;
|
||||
|
||||
DC_FP_START();
|
||||
dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
|
||||
bpp_increment_div);
|
||||
DC_FP_END();
|
||||
return dsc_bpp;
|
||||
}
|
||||
|
|
|
@ -77,7 +77,10 @@ struct qp_entry {
|
|||
|
||||
typedef struct qp_entry qp_table[];
|
||||
|
||||
void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version);
|
||||
void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
|
||||
u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
|
||||
u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -27,8 +27,6 @@
|
|||
#include "dscc_types.h"
|
||||
#include "rc_calc.h"
|
||||
|
||||
double dsc_ceil(double num);
|
||||
|
||||
static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from)
|
||||
{
|
||||
to->line_buf_depth = from->line_buf_depth;
|
||||
|
@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param
|
|||
|
||||
int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params)
|
||||
{
|
||||
enum colour_mode mode = pps->convert_rgb ? CM_RGB :
|
||||
(pps->simple_422 ? CM_444 :
|
||||
(pps->native_422 ? CM_422 :
|
||||
pps->native_420 ? CM_420 : CM_444));
|
||||
enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 :
|
||||
(pps->bits_per_component == 10) ? BPC_10 : BPC_12;
|
||||
float bpp = ((float) pps->bits_per_pixel / 16.0);
|
||||
int slice_width = pps->slice_width;
|
||||
int slice_height = pps->slice_height;
|
||||
int ret;
|
||||
struct rc_params rc;
|
||||
struct drm_dsc_config dsc_cfg;
|
||||
|
||||
double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
|
||||
dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps);
|
||||
|
||||
// TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied)
|
||||
if (pps->native_422 || pps->native_420)
|
||||
d_bytes_per_pixel /= 2;
|
||||
|
||||
dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000);
|
||||
|
||||
/* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp
|
||||
* (the latter is what calc_rc_params expects)
|
||||
*/
|
||||
if (pps->native_422 || pps->native_420)
|
||||
bpp /= 2.0;
|
||||
|
||||
calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor);
|
||||
calc_rc_params(&rc, pps);
|
||||
dsc_params->pps = *pps;
|
||||
dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset);
|
||||
|
||||
|
|
|
@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
|
|||
pow_buffer_ptr = -1; // reset back to no optimize
|
||||
ret = true;
|
||||
release:
|
||||
kfree(coeff);
|
||||
kvfree(coeff);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1777,7 +1777,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
|
|||
|
||||
kfree(rgb_regamma);
|
||||
rgb_regamma_alloc_fail:
|
||||
kvfree(rgb_user);
|
||||
kfree(rgb_user);
|
||||
rgb_user_alloc_fail:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -239,7 +239,7 @@ static void ci_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
|
|||
|
||||
switch (dev_id) {
|
||||
case 0x67BA:
|
||||
case 0x66B1:
|
||||
case 0x67B1:
|
||||
smu_data->power_tune_defaults = &defaults_hawaii_pro;
|
||||
break;
|
||||
case 0x67B8:
|
||||
|
|
|
@ -2579,14 +2579,14 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder,
|
|||
|
||||
static void
|
||||
tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
|
||||
u32 level)
|
||||
u32 level, enum intel_output_type type)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
|
||||
enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port);
|
||||
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
|
||||
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
|
||||
|
||||
if (encoder->type == INTEL_OUTPUT_HDMI) {
|
||||
if (type == INTEL_OUTPUT_HDMI) {
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
|
||||
ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
|
||||
} else {
|
||||
|
@ -2638,7 +2638,7 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder,
|
|||
if (intel_phy_is_combo(dev_priv, phy))
|
||||
icl_combo_phy_ddi_vswing_sequence(encoder, level, type);
|
||||
else
|
||||
tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level);
|
||||
tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type);
|
||||
}
|
||||
|
||||
static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels)
|
||||
|
@ -2987,7 +2987,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port,
|
|||
ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port));
|
||||
}
|
||||
|
||||
ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE);
|
||||
ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
|
||||
ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
|
||||
|
||||
/* DPPATC */
|
||||
|
@ -3472,7 +3472,9 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state,
|
|||
INTEL_OUTPUT_DP_MST);
|
||||
enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
|
||||
|
||||
intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state);
|
||||
if (!is_mst)
|
||||
intel_dp_set_infoframes(encoder, false,
|
||||
old_crtc_state, old_conn_state);
|
||||
|
||||
/*
|
||||
* Power down sink before disabling the port, otherwise we end
|
||||
|
|
|
@ -397,6 +397,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
|
|||
*/
|
||||
drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
|
||||
false);
|
||||
|
||||
/*
|
||||
* BSpec 4287: disable DIP after the transcoder is disabled and before
|
||||
* the transcoder clock select is set to none.
|
||||
*/
|
||||
if (last_mst_stream)
|
||||
intel_dp_set_infoframes(&intel_dig_port->base, false,
|
||||
old_crtc_state, NULL);
|
||||
/*
|
||||
* From TGL spec: "If multi-stream slave transcoder: Configure
|
||||
* Transcoder Clock Select to direct no clock to the transcoder"
|
||||
|
|
|
@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
|
|||
struct measure_breadcrumb {
|
||||
struct i915_request rq;
|
||||
struct intel_ring ring;
|
||||
u32 cs[1024];
|
||||
u32 cs[2048];
|
||||
};
|
||||
|
||||
static int measure_breadcrumb_dw(struct intel_context *ce)
|
||||
|
@ -668,6 +668,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
|
|||
|
||||
frame->ring.vaddr = frame->cs;
|
||||
frame->ring.size = sizeof(frame->cs);
|
||||
frame->ring.wrap =
|
||||
BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
|
||||
frame->ring.effective_size = frame->ring.size;
|
||||
intel_ring_update_space(&frame->ring);
|
||||
frame->rq.ring = &frame->ring;
|
||||
|
|
|
@ -1134,6 +1134,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
|
|||
list_move(&rq->sched.link, pl);
|
||||
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
|
||||
|
||||
/* Check in case we rollback so far we wrap [size/2] */
|
||||
if (intel_ring_direction(rq->ring,
|
||||
intel_ring_wrap(rq->ring,
|
||||
rq->tail),
|
||||
rq->ring->tail) > 0)
|
||||
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
|
||||
|
||||
active = rq;
|
||||
} else {
|
||||
struct intel_engine_cs *owner = rq->context->engine;
|
||||
|
@ -1498,8 +1505,9 @@ static u64 execlists_update_context(struct i915_request *rq)
|
|||
* HW has a tendency to ignore us rewinding the TAIL to the end of
|
||||
* an earlier request.
|
||||
*/
|
||||
GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
|
||||
prev = rq->ring->tail;
|
||||
tail = intel_ring_set_tail(rq->ring, rq->tail);
|
||||
prev = ce->lrc_reg_state[CTX_RING_TAIL];
|
||||
if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
|
||||
desc |= CTX_DESC_FORCE_RESTORE;
|
||||
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
|
||||
|
@ -1895,7 +1903,8 @@ static void defer_active(struct intel_engine_cs *engine)
|
|||
|
||||
static bool
|
||||
need_timeslice(const struct intel_engine_cs *engine,
|
||||
const struct i915_request *rq)
|
||||
const struct i915_request *rq,
|
||||
const struct rb_node *rb)
|
||||
{
|
||||
int hint;
|
||||
|
||||
|
@ -1903,9 +1912,28 @@ need_timeslice(const struct intel_engine_cs *engine,
|
|||
return false;
|
||||
|
||||
hint = engine->execlists.queue_priority_hint;
|
||||
|
||||
if (rb) {
|
||||
const struct virtual_engine *ve =
|
||||
rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
|
||||
const struct intel_engine_cs *inflight =
|
||||
intel_context_inflight(&ve->context);
|
||||
|
||||
if (!inflight || inflight == engine) {
|
||||
struct i915_request *next;
|
||||
|
||||
rcu_read_lock();
|
||||
next = READ_ONCE(ve->request);
|
||||
if (next)
|
||||
hint = max(hint, rq_prio(next));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
if (!list_is_last(&rq->sched.link, &engine->active.requests))
|
||||
hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
|
||||
|
||||
GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
|
||||
return hint >= effective_prio(rq);
|
||||
}
|
||||
|
||||
|
@ -1977,10 +2005,9 @@ static void set_timeslice(struct intel_engine_cs *engine)
|
|||
set_timer_ms(&engine->execlists.timer, duration);
|
||||
}
|
||||
|
||||
static void start_timeslice(struct intel_engine_cs *engine)
|
||||
static void start_timeslice(struct intel_engine_cs *engine, int prio)
|
||||
{
|
||||
struct intel_engine_execlists *execlists = &engine->execlists;
|
||||
const int prio = queue_prio(execlists);
|
||||
unsigned long duration;
|
||||
|
||||
if (!intel_engine_has_timeslices(engine))
|
||||
|
@ -2140,7 +2167,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|||
__unwind_incomplete_requests(engine);
|
||||
|
||||
last = NULL;
|
||||
} else if (need_timeslice(engine, last) &&
|
||||
} else if (need_timeslice(engine, last, rb) &&
|
||||
timeslice_expired(execlists, last)) {
|
||||
if (i915_request_completed(last)) {
|
||||
tasklet_hi_schedule(&execlists->tasklet);
|
||||
|
@ -2188,7 +2215,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|||
* Even if ELSP[1] is occupied and not worthy
|
||||
* of timeslices, our queue might be.
|
||||
*/
|
||||
start_timeslice(engine);
|
||||
start_timeslice(engine, queue_prio(execlists));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -2223,7 +2250,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|||
|
||||
if (last && !can_merge_rq(last, rq)) {
|
||||
spin_unlock(&ve->base.active.lock);
|
||||
start_timeslice(engine);
|
||||
start_timeslice(engine, rq_prio(rq));
|
||||
return; /* leave this for another sibling */
|
||||
}
|
||||
|
||||
|
@ -4739,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void assert_request_valid(struct i915_request *rq)
|
||||
{
|
||||
struct intel_ring *ring __maybe_unused = rq->ring;
|
||||
|
||||
/* Can we unwind this request without appearing to go forwards? */
|
||||
GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve space for 2 NOOPs at the end of each request to be
|
||||
* used as a workaround for not being allowed to do lite
|
||||
|
@ -4751,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
|
|||
*cs++ = MI_NOOP;
|
||||
request->wa_tail = intel_ring_offset(request, cs);
|
||||
|
||||
/* Check that entire request is less than half the ring */
|
||||
assert_request_valid(request);
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
|
|
|
@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
|
|||
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftest_ring.c"
|
||||
#endif
|
||||
|
|
|
@ -178,6 +178,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
|
|||
wa_write_masked_or(wal, reg, set, set);
|
||||
}
|
||||
|
||||
static void
|
||||
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
|
||||
{
|
||||
wa_write_masked_or(wal, reg, clr, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
|
||||
{
|
||||
|
@ -686,6 +692,227 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
gen4_gt_workarounds_init(struct drm_i915_private *i915,
|
||||
struct i915_wa_list *wal)
|
||||
{
|
||||
/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
|
||||
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
|
||||
}
|
||||
|
||||
static void
|
||||
g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
gen4_gt_workarounds_init(i915, wal);
|
||||
|
||||
/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
|
||||
wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
|
||||
}
|
||||
|
||||
static void
|
||||
ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
g4x_gt_workarounds_init(i915, wal);
|
||||
|
||||
wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
|
||||
}
|
||||
|
||||
static void
|
||||
snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
|
||||
wa_masked_en(wal,
|
||||
_3D_CHICKEN,
|
||||
_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:snb */
|
||||
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
wa_add(wal,
|
||||
GEN6_GT_MODE, 0,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
|
||||
GEN6_WIZ_HASHING_16x4);
|
||||
|
||||
wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
|
||||
|
||||
wa_masked_en(wal,
|
||||
_3D_CHICKEN3,
|
||||
/* WaStripsFansDisableFastClipPerformanceFix:snb */
|
||||
_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
|
||||
/*
|
||||
* Bspec says:
|
||||
* "This bit must be set if 3DSTATE_CLIP clip mode is set
|
||||
* to normal and 3DSTATE_SF number of SF output attributes
|
||||
* is more than 16."
|
||||
*/
|
||||
_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
|
||||
}
|
||||
|
||||
static void
|
||||
ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
/* WaDisableEarlyCull:ivb */
|
||||
wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
|
||||
|
||||
/* WaDisablePSDDualDispatchEnable:ivb */
|
||||
if (IS_IVB_GT1(i915))
|
||||
wa_masked_en(wal,
|
||||
GEN7_HALF_SLICE_CHICKEN1,
|
||||
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:ivb */
|
||||
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
|
||||
|
||||
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
|
||||
wa_masked_dis(wal,
|
||||
GEN7_COMMON_SLICE_CHICKEN1,
|
||||
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
|
||||
|
||||
/* WaApplyL3ControlAndL3ChickenMode:ivb */
|
||||
wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
|
||||
wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
|
||||
|
||||
/* WaForceL3Serialization:ivb */
|
||||
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
||||
|
||||
/*
|
||||
* WaVSThreadDispatchOverride:ivb,vlv
|
||||
*
|
||||
* This actually overrides the dispatch
|
||||
* mode for all thread types.
|
||||
*/
|
||||
wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
|
||||
GEN7_FF_SCHED_MASK,
|
||||
GEN7_FF_TS_SCHED_HW |
|
||||
GEN7_FF_VS_SCHED_HW |
|
||||
GEN7_FF_DS_SCHED_HW);
|
||||
|
||||
if (0) { /* causes HiZ corruption on ivb:gt1 */
|
||||
/* enable HiZ Raw Stall Optimization */
|
||||
wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
|
||||
}
|
||||
|
||||
/* WaDisable4x2SubspanOptimization:ivb */
|
||||
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
wa_add(wal, GEN7_GT_MODE, 0,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
|
||||
GEN6_WIZ_HASHING_16x4);
|
||||
}
|
||||
|
||||
static void
|
||||
vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
/* WaDisableEarlyCull:vlv */
|
||||
wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
|
||||
|
||||
/* WaPsdDispatchEnable:vlv */
|
||||
/* WaDisablePSDDualDispatchEnable:vlv */
|
||||
wa_masked_en(wal,
|
||||
GEN7_HALF_SLICE_CHICKEN1,
|
||||
GEN7_MAX_PS_THREAD_DEP |
|
||||
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:vlv */
|
||||
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
|
||||
|
||||
/* WaForceL3Serialization:vlv */
|
||||
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
||||
|
||||
/*
|
||||
* WaVSThreadDispatchOverride:ivb,vlv
|
||||
*
|
||||
* This actually overrides the dispatch
|
||||
* mode for all thread types.
|
||||
*/
|
||||
wa_write_masked_or(wal,
|
||||
GEN7_FF_THREAD_MODE,
|
||||
GEN7_FF_SCHED_MASK,
|
||||
GEN7_FF_TS_SCHED_HW |
|
||||
GEN7_FF_VS_SCHED_HW |
|
||||
GEN7_FF_DS_SCHED_HW);
|
||||
|
||||
/*
|
||||
* BSpec says this must be set, even though
|
||||
* WaDisable4x2SubspanOptimization isn't listed for VLV.
|
||||
*/
|
||||
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
wa_add(wal, GEN7_GT_MODE, 0,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
|
||||
GEN6_WIZ_HASHING_16x4);
|
||||
|
||||
/*
|
||||
* WaIncreaseL3CreditsForVLVB0:vlv
|
||||
* This is the hardware default actually.
|
||||
*/
|
||||
wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
|
||||
}
|
||||
|
||||
static void
|
||||
hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
/* L3 caching of data atomics doesn't work -- disable it. */
|
||||
wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
|
||||
|
||||
wa_add(wal,
|
||||
HSW_ROW_CHICKEN3, 0,
|
||||
_MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
|
||||
0 /* XXX does this reg exist? */);
|
||||
|
||||
/* WaVSRefCountFullforceMissDisable:hsw */
|
||||
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
|
||||
|
||||
wa_masked_dis(wal,
|
||||
CACHE_MODE_0_GEN7,
|
||||
/* WaDisable_RenderCache_OperationalFlush:hsw */
|
||||
RC_OP_FLUSH_ENABLE |
|
||||
/* enable HiZ Raw Stall Optimization */
|
||||
HIZ_RAW_STALL_OPT_DISABLE);
|
||||
|
||||
/* WaDisable4x2SubspanOptimization:hsw */
|
||||
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
wa_add(wal, GEN7_GT_MODE, 0,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
|
||||
GEN6_WIZ_HASHING_16x4);
|
||||
|
||||
/* WaSampleCChickenBitEnable:hsw */
|
||||
wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
|
||||
}
|
||||
|
||||
static void
|
||||
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
|
@ -963,6 +1190,20 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
|||
bxt_gt_workarounds_init(i915, wal);
|
||||
else if (IS_SKYLAKE(i915))
|
||||
skl_gt_workarounds_init(i915, wal);
|
||||
else if (IS_HASWELL(i915))
|
||||
hsw_gt_workarounds_init(i915, wal);
|
||||
else if (IS_VALLEYVIEW(i915))
|
||||
vlv_gt_workarounds_init(i915, wal);
|
||||
else if (IS_IVYBRIDGE(i915))
|
||||
ivb_gt_workarounds_init(i915, wal);
|
||||
else if (IS_GEN(i915, 6))
|
||||
snb_gt_workarounds_init(i915, wal);
|
||||
else if (IS_GEN(i915, 5))
|
||||
ilk_gt_workarounds_init(i915, wal);
|
||||
else if (IS_G4X(i915))
|
||||
g4x_gt_workarounds_init(i915, wal);
|
||||
else if (IS_GEN(i915, 4))
|
||||
gen4_gt_workarounds_init(i915, wal);
|
||||
else if (INTEL_GEN(i915) <= 8)
|
||||
return;
|
||||
else
|
||||
|
|
|
@ -310,22 +310,20 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
|
|||
1000));
|
||||
}
|
||||
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine,
|
||||
unsigned long *saved)
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine)
|
||||
{
|
||||
*saved = engine->props.heartbeat_interval_ms;
|
||||
engine->props.heartbeat_interval_ms = 0;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
intel_engine_park_heartbeat(engine);
|
||||
}
|
||||
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine,
|
||||
unsigned long saved)
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_engine_pm_put(engine);
|
||||
|
||||
engine->props.heartbeat_interval_ms = saved;
|
||||
engine->props.heartbeat_interval_ms =
|
||||
engine->defaults.heartbeat_interval_ms;
|
||||
}
|
||||
|
||||
static int igt_hang_sanitycheck(void *arg)
|
||||
|
@ -473,7 +471,6 @@ static int igt_reset_nop_engine(void *arg)
|
|||
for_each_engine(engine, gt, id) {
|
||||
unsigned int reset_count, reset_engine_count, count;
|
||||
struct intel_context *ce;
|
||||
unsigned long heartbeat;
|
||||
IGT_TIMEOUT(end_time);
|
||||
int err;
|
||||
|
||||
|
@ -485,7 +482,7 @@ static int igt_reset_nop_engine(void *arg)
|
|||
reset_engine_count = i915_reset_engine_count(global, engine);
|
||||
count = 0;
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
do {
|
||||
int i;
|
||||
|
@ -529,7 +526,7 @@ static int igt_reset_nop_engine(void *arg)
|
|||
}
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
|
||||
|
||||
|
@ -564,7 +561,6 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
|
|||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned int reset_count, reset_engine_count;
|
||||
unsigned long heartbeat;
|
||||
IGT_TIMEOUT(end_time);
|
||||
|
||||
if (active && !intel_engine_can_store_dword(engine))
|
||||
|
@ -580,7 +576,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
|
|||
reset_count = i915_reset_count(global);
|
||||
reset_engine_count = i915_reset_engine_count(global, engine);
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
do {
|
||||
if (active) {
|
||||
|
@ -632,7 +628,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
|
|||
}
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
|
@ -789,7 +785,6 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
|||
struct active_engine threads[I915_NUM_ENGINES] = {};
|
||||
unsigned long device = i915_reset_count(global);
|
||||
unsigned long count = 0, reported;
|
||||
unsigned long heartbeat;
|
||||
IGT_TIMEOUT(end_time);
|
||||
|
||||
if (flags & TEST_ACTIVE &&
|
||||
|
@ -832,7 +827,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
|||
|
||||
yield(); /* start all threads before we begin */
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
do {
|
||||
struct i915_request *rq = NULL;
|
||||
|
@ -906,7 +901,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
|||
}
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
|
||||
engine->name, test_name, count);
|
||||
|
|
|
@ -51,22 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
|
|||
return vma;
|
||||
}
|
||||
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine,
|
||||
unsigned long *saved)
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine)
|
||||
{
|
||||
*saved = engine->props.heartbeat_interval_ms;
|
||||
engine->props.heartbeat_interval_ms = 0;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
intel_engine_park_heartbeat(engine);
|
||||
}
|
||||
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine,
|
||||
unsigned long saved)
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_engine_pm_put(engine);
|
||||
|
||||
engine->props.heartbeat_interval_ms = saved;
|
||||
engine->props.heartbeat_interval_ms =
|
||||
engine->defaults.heartbeat_interval_ms;
|
||||
}
|
||||
|
||||
static bool is_active(struct i915_request *rq)
|
||||
|
@ -224,7 +222,6 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
|
|||
struct intel_context *ce[2] = {};
|
||||
struct i915_request *rq[2];
|
||||
struct igt_live_test t;
|
||||
unsigned long saved;
|
||||
int n;
|
||||
|
||||
if (prio && !intel_engine_has_preemption(engine))
|
||||
|
@ -237,7 +234,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
|
|||
err = -EIO;
|
||||
break;
|
||||
}
|
||||
engine_heartbeat_disable(engine, &saved);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
for (n = 0; n < ARRAY_SIZE(ce); n++) {
|
||||
struct intel_context *tmp;
|
||||
|
@ -345,7 +342,7 @@ err_ce:
|
|||
intel_context_put(ce[n]);
|
||||
}
|
||||
|
||||
engine_heartbeat_enable(engine, saved);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (igt_live_test_end(&t))
|
||||
err = -EIO;
|
||||
if (err)
|
||||
|
@ -466,7 +463,6 @@ static int live_hold_reset(void *arg)
|
|||
|
||||
for_each_engine(engine, gt, id) {
|
||||
struct intel_context *ce;
|
||||
unsigned long heartbeat;
|
||||
struct i915_request *rq;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
|
@ -475,7 +471,7 @@ static int live_hold_reset(void *arg)
|
|||
break;
|
||||
}
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
|
||||
if (IS_ERR(rq)) {
|
||||
|
@ -535,7 +531,7 @@ static int live_hold_reset(void *arg)
|
|||
i915_request_put(rq);
|
||||
|
||||
out:
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
intel_context_put(ce);
|
||||
if (err)
|
||||
break;
|
||||
|
@ -580,10 +576,9 @@ static int live_error_interrupt(void *arg)
|
|||
|
||||
for_each_engine(engine, gt, id) {
|
||||
const struct error_phase *p;
|
||||
unsigned long heartbeat;
|
||||
int err = 0;
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
for (p = phases; p->error[0] != GOOD; p++) {
|
||||
struct i915_request *client[ARRAY_SIZE(phases->error)];
|
||||
|
@ -682,7 +677,7 @@ out:
|
|||
}
|
||||
}
|
||||
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err) {
|
||||
intel_gt_set_wedged(gt);
|
||||
return err;
|
||||
|
@ -828,7 +823,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
|
|||
}
|
||||
}
|
||||
|
||||
err = release_queue(outer, vma, n, INT_MAX);
|
||||
err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
@ -895,16 +890,14 @@ static int live_timeslice_preempt(void *arg)
|
|||
enum intel_engine_id id;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved;
|
||||
|
||||
if (!intel_engine_has_preemption(engine))
|
||||
continue;
|
||||
|
||||
memset(vaddr, 0, PAGE_SIZE);
|
||||
|
||||
engine_heartbeat_disable(engine, &saved);
|
||||
engine_heartbeat_disable(engine);
|
||||
err = slice_semaphore_queue(engine, vma, count);
|
||||
engine_heartbeat_enable(engine, saved);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
goto err_pin;
|
||||
|
||||
|
@ -1009,7 +1002,6 @@ static int live_timeslice_rewind(void *arg)
|
|||
enum { X = 1, Z, Y };
|
||||
struct i915_request *rq[3] = {};
|
||||
struct intel_context *ce;
|
||||
unsigned long heartbeat;
|
||||
unsigned long timeslice;
|
||||
int i, err = 0;
|
||||
u32 *slot;
|
||||
|
@ -1028,7 +1020,7 @@ static int live_timeslice_rewind(void *arg)
|
|||
* Expect execution/evaluation order XZY
|
||||
*/
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
|
||||
|
||||
slot = memset32(engine->status_page.addr + 1000, 0, 4);
|
||||
|
@ -1122,7 +1114,7 @@ err:
|
|||
wmb();
|
||||
|
||||
engine->props.timeslice_duration_ms = timeslice;
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
for (i = 0; i < 3; i++)
|
||||
i915_request_put(rq[i]);
|
||||
if (igt_flush_test(gt->i915))
|
||||
|
@ -1202,12 +1194,11 @@ static int live_timeslice_queue(void *arg)
|
|||
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
|
||||
};
|
||||
struct i915_request *rq, *nop;
|
||||
unsigned long saved;
|
||||
|
||||
if (!intel_engine_has_preemption(engine))
|
||||
continue;
|
||||
|
||||
engine_heartbeat_disable(engine, &saved);
|
||||
engine_heartbeat_disable(engine);
|
||||
memset(vaddr, 0, PAGE_SIZE);
|
||||
|
||||
/* ELSP[0]: semaphore wait */
|
||||
|
@ -1284,7 +1275,7 @@ static int live_timeslice_queue(void *arg)
|
|||
err_rq:
|
||||
i915_request_put(rq);
|
||||
err_heartbeat:
|
||||
engine_heartbeat_enable(engine, saved);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
@ -1298,6 +1289,121 @@ err_obj:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int live_timeslice_nopreempt(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
struct igt_spinner spin;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* We should not timeslice into a request that is marked with
|
||||
* I915_REQUEST_NOPREEMPT.
|
||||
*/
|
||||
if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
|
||||
return 0;
|
||||
|
||||
if (igt_spinner_init(&spin, gt))
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
struct intel_context *ce;
|
||||
struct i915_request *rq;
|
||||
unsigned long timeslice;
|
||||
|
||||
if (!intel_engine_has_preemption(engine))
|
||||
continue;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
break;
|
||||
}
|
||||
|
||||
engine_heartbeat_disable(engine);
|
||||
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
|
||||
|
||||
/* Create an unpreemptible spinner */
|
||||
|
||||
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
|
||||
intel_context_put(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_heartbeat;
|
||||
}
|
||||
|
||||
i915_request_get(rq);
|
||||
i915_request_add(rq);
|
||||
|
||||
if (!igt_wait_for_spinner(&spin, rq)) {
|
||||
i915_request_put(rq);
|
||||
err = -ETIME;
|
||||
goto out_spin;
|
||||
}
|
||||
|
||||
set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
|
||||
i915_request_put(rq);
|
||||
|
||||
/* Followed by a maximum priority barrier (heartbeat) */
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_spin;
|
||||
}
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
intel_context_put(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_spin;
|
||||
}
|
||||
|
||||
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
|
||||
i915_request_get(rq);
|
||||
i915_request_add(rq);
|
||||
|
||||
/*
|
||||
* Wait until the barrier is in ELSP, and we know timeslicing
|
||||
* will have been activated.
|
||||
*/
|
||||
if (wait_for_submit(engine, rq, HZ / 2)) {
|
||||
i915_request_put(rq);
|
||||
err = -ETIME;
|
||||
goto out_spin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the ELSP[0] request is unpreemptible, it should not
|
||||
* allow the maximum priority barrier through. Wait long
|
||||
* enough to see if it is timesliced in by mistake.
|
||||
*/
|
||||
if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
|
||||
pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
|
||||
engine->name);
|
||||
err = -EINVAL;
|
||||
}
|
||||
i915_request_put(rq);
|
||||
|
||||
out_spin:
|
||||
igt_spinner_end(&spin);
|
||||
out_heartbeat:
|
||||
xchg(&engine->props.timeslice_duration_ms, timeslice);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
if (igt_flush_test(gt->i915)) {
|
||||
err = -EIO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
igt_spinner_fini(&spin);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int live_busywait_preempt(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
|
@ -4153,7 +4259,6 @@ static int reset_virtual_engine(struct intel_gt *gt,
|
|||
{
|
||||
struct intel_engine_cs *engine;
|
||||
struct intel_context *ve;
|
||||
unsigned long *heartbeat;
|
||||
struct igt_spinner spin;
|
||||
struct i915_request *rq;
|
||||
unsigned int n;
|
||||
|
@ -4165,15 +4270,9 @@ static int reset_virtual_engine(struct intel_gt *gt,
|
|||
* descendents are not executed while the capture is in progress.
|
||||
*/
|
||||
|
||||
heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
|
||||
if (!heartbeat)
|
||||
if (igt_spinner_init(&spin, gt))
|
||||
return -ENOMEM;
|
||||
|
||||
if (igt_spinner_init(&spin, gt)) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
ve = intel_execlists_create_virtual(siblings, nsibling);
|
||||
if (IS_ERR(ve)) {
|
||||
err = PTR_ERR(ve);
|
||||
|
@ -4181,7 +4280,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
|
|||
}
|
||||
|
||||
for (n = 0; n < nsibling; n++)
|
||||
engine_heartbeat_disable(siblings[n], &heartbeat[n]);
|
||||
engine_heartbeat_disable(siblings[n]);
|
||||
|
||||
rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
|
||||
if (IS_ERR(rq)) {
|
||||
|
@ -4252,13 +4351,11 @@ out_rq:
|
|||
i915_request_put(rq);
|
||||
out_heartbeat:
|
||||
for (n = 0; n < nsibling; n++)
|
||||
engine_heartbeat_enable(siblings[n], heartbeat[n]);
|
||||
engine_heartbeat_enable(siblings[n]);
|
||||
|
||||
intel_context_put(ve);
|
||||
out_spin:
|
||||
igt_spinner_fini(&spin);
|
||||
out_free:
|
||||
kfree(heartbeat);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -4314,6 +4411,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
|
|||
SUBTEST(live_timeslice_preempt),
|
||||
SUBTEST(live_timeslice_rewind),
|
||||
SUBTEST(live_timeslice_queue),
|
||||
SUBTEST(live_timeslice_nopreempt),
|
||||
SUBTEST(live_busywait_preempt),
|
||||
SUBTEST(live_preempt),
|
||||
SUBTEST(live_late_preempt),
|
||||
|
@ -4932,9 +5030,7 @@ static int live_lrc_gpr(void *arg)
|
|||
return PTR_ERR(scratch);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long heartbeat;
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
err = __live_lrc_gpr(engine, scratch, false);
|
||||
if (err)
|
||||
|
@ -4945,7 +5041,7 @@ static int live_lrc_gpr(void *arg)
|
|||
goto err;
|
||||
|
||||
err:
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (igt_flush_test(gt->i915))
|
||||
err = -EIO;
|
||||
if (err)
|
||||
|
@ -5092,10 +5188,9 @@ static int live_lrc_timestamp(void *arg)
|
|||
*/
|
||||
|
||||
for_each_engine(data.engine, gt, id) {
|
||||
unsigned long heartbeat;
|
||||
int i, err = 0;
|
||||
|
||||
engine_heartbeat_disable(data.engine, &heartbeat);
|
||||
engine_heartbeat_disable(data.engine);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
|
||||
struct intel_context *tmp;
|
||||
|
@ -5128,7 +5223,7 @@ static int live_lrc_timestamp(void *arg)
|
|||
}
|
||||
|
||||
err:
|
||||
engine_heartbeat_enable(data.engine, heartbeat);
|
||||
engine_heartbeat_enable(data.engine);
|
||||
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
|
||||
if (!data.ce[i])
|
||||
break;
|
||||
|
|
|
@ -18,6 +18,20 @@ struct live_mocs {
|
|||
void *vaddr;
|
||||
};
|
||||
|
||||
static struct intel_context *mocs_context_create(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_context *ce;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
if (IS_ERR(ce))
|
||||
return ce;
|
||||
|
||||
/* We build large requests to read the registers from the ring */
|
||||
ce->ring = __intel_context_ring_size(SZ_16K);
|
||||
|
||||
return ce;
|
||||
}
|
||||
|
||||
static int request_add_sync(struct i915_request *rq, int err)
|
||||
{
|
||||
i915_request_get(rq);
|
||||
|
@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg)
|
|||
for_each_engine(engine, gt, id) {
|
||||
struct intel_context *ce;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
ce = mocs_context_create(engine);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
break;
|
||||
|
@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg)
|
|||
for_each_engine(engine, gt, id) {
|
||||
struct intel_context *ce;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
ce = mocs_context_create(engine);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*/
|
||||
|
||||
static struct intel_ring *mock_ring(unsigned long sz)
|
||||
{
|
||||
struct intel_ring *ring;
|
||||
|
||||
ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
|
||||
if (!ring)
|
||||
return NULL;
|
||||
|
||||
kref_init(&ring->ref);
|
||||
ring->size = sz;
|
||||
ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz);
|
||||
ring->effective_size = sz;
|
||||
ring->vaddr = (void *)(ring + 1);
|
||||
atomic_set(&ring->pin_count, 1);
|
||||
|
||||
intel_ring_update_space(ring);
|
||||
|
||||
return ring;
|
||||
}
|
||||
|
||||
static void mock_ring_free(struct intel_ring *ring)
|
||||
{
|
||||
kfree(ring);
|
||||
}
|
||||
|
||||
static int check_ring_direction(struct intel_ring *ring,
|
||||
u32 next, u32 prev,
|
||||
int expected)
|
||||
{
|
||||
int result;
|
||||
|
||||
result = intel_ring_direction(ring, next, prev);
|
||||
if (result < 0)
|
||||
result = -1;
|
||||
else if (result > 0)
|
||||
result = 1;
|
||||
|
||||
if (result != expected) {
|
||||
pr_err("intel_ring_direction(%u, %u):%d != %d\n",
|
||||
next, prev, result, expected);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_ring_step(struct intel_ring *ring, u32 x, u32 step)
|
||||
{
|
||||
u32 prev = x, next = intel_ring_wrap(ring, x + step);
|
||||
int err = 0;
|
||||
|
||||
err |= check_ring_direction(ring, next, next, 0);
|
||||
err |= check_ring_direction(ring, prev, prev, 0);
|
||||
err |= check_ring_direction(ring, next, prev, 1);
|
||||
err |= check_ring_direction(ring, prev, next, -1);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err |= check_ring_step(ring, x, step);
|
||||
err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step);
|
||||
err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_ring_direction(void *dummy)
|
||||
{
|
||||
struct intel_ring *ring;
|
||||
unsigned int half = 2048;
|
||||
int step, err = 0;
|
||||
|
||||
ring = mock_ring(2 * half);
|
||||
if (!ring)
|
||||
return -ENOMEM;
|
||||
|
||||
GEM_BUG_ON(ring->size != 2 * half);
|
||||
|
||||
/* Precision of wrap detection is limited to ring->size / 2 */
|
||||
for (step = 1; step < half; step <<= 1) {
|
||||
err |= check_ring_offset(ring, 0, step);
|
||||
err |= check_ring_offset(ring, half, step);
|
||||
}
|
||||
err |= check_ring_step(ring, 0, half - 64);
|
||||
|
||||
/* And check unwrapped handling for good measure */
|
||||
err |= check_ring_offset(ring, 0, 2 * half + 64);
|
||||
err |= check_ring_offset(ring, 3 * half, 1);
|
||||
|
||||
mock_ring_free(ring);
|
||||
return err;
|
||||
}
|
||||
|
||||
int intel_ring_mock_selftests(void)
|
||||
{
|
||||
static const struct i915_subtest tests[] = {
|
||||
SUBTEST(igt_ring_direction),
|
||||
};
|
||||
|
||||
return i915_subtests(tests, NULL);
|
||||
}
|
|
@ -20,24 +20,20 @@
|
|||
/* Try to isolate the impact of cstates from determing frequency response */
|
||||
#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
|
||||
|
||||
static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine)
|
||||
{
|
||||
unsigned long old;
|
||||
|
||||
old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
|
||||
engine->props.heartbeat_interval_ms = 0;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
intel_engine_park_heartbeat(engine);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine,
|
||||
unsigned long saved)
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_engine_pm_put(engine);
|
||||
|
||||
engine->props.heartbeat_interval_ms = saved;
|
||||
engine->props.heartbeat_interval_ms =
|
||||
engine->defaults.heartbeat_interval_ms;
|
||||
}
|
||||
|
||||
static void dummy_rps_work(struct work_struct *wrk)
|
||||
|
@ -246,7 +242,6 @@ int live_rps_clock_interval(void *arg)
|
|||
intel_gt_check_clock_frequency(gt);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved_heartbeat;
|
||||
struct i915_request *rq;
|
||||
u32 cycles;
|
||||
u64 dt;
|
||||
|
@ -254,13 +249,13 @@ int live_rps_clock_interval(void *arg)
|
|||
if (!intel_engine_can_store_dword(engine))
|
||||
continue;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
rq = igt_spinner_create_request(&spin,
|
||||
engine->kernel_context,
|
||||
MI_NOOP);
|
||||
if (IS_ERR(rq)) {
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
err = PTR_ERR(rq);
|
||||
break;
|
||||
}
|
||||
|
@ -271,7 +266,7 @@ int live_rps_clock_interval(void *arg)
|
|||
pr_err("%s: RPS spinner did not start\n",
|
||||
engine->name);
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
intel_gt_set_wedged(engine->gt);
|
||||
err = -EIO;
|
||||
break;
|
||||
|
@ -327,7 +322,7 @@ int live_rps_clock_interval(void *arg)
|
|||
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
|
||||
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
if (err == 0) {
|
||||
u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
|
||||
|
@ -405,7 +400,6 @@ int live_rps_control(void *arg)
|
|||
|
||||
intel_gt_pm_get(gt);
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved_heartbeat;
|
||||
struct i915_request *rq;
|
||||
ktime_t min_dt, max_dt;
|
||||
int f, limit;
|
||||
|
@ -414,7 +408,7 @@ int live_rps_control(void *arg)
|
|||
if (!intel_engine_can_store_dword(engine))
|
||||
continue;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
rq = igt_spinner_create_request(&spin,
|
||||
engine->kernel_context,
|
||||
|
@ -430,7 +424,7 @@ int live_rps_control(void *arg)
|
|||
pr_err("%s: RPS spinner did not start\n",
|
||||
engine->name);
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
intel_gt_set_wedged(engine->gt);
|
||||
err = -EIO;
|
||||
break;
|
||||
|
@ -440,7 +434,7 @@ int live_rps_control(void *arg)
|
|||
pr_err("%s: could not set minimum frequency [%x], only %x!\n",
|
||||
engine->name, rps->min_freq, read_cagf(rps));
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
show_pstate_limits(rps);
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
@ -457,7 +451,7 @@ int live_rps_control(void *arg)
|
|||
pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
|
||||
engine->name, rps->min_freq, read_cagf(rps));
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
show_pstate_limits(rps);
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
@ -472,7 +466,7 @@ int live_rps_control(void *arg)
|
|||
min_dt = ktime_sub(ktime_get(), min_dt);
|
||||
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
|
||||
engine->name,
|
||||
|
@ -635,7 +629,6 @@ int live_rps_frequency_cs(void *arg)
|
|||
rps->work.func = dummy_rps_work;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved_heartbeat;
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
u32 *cancel, *cntr;
|
||||
|
@ -644,14 +637,14 @@ int live_rps_frequency_cs(void *arg)
|
|||
int freq;
|
||||
} min, max;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
vma = create_spin_counter(engine,
|
||||
engine->kernel_context->vm, false,
|
||||
&cancel, &cntr);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -732,7 +725,7 @@ err_vma:
|
|||
i915_vma_unpin(vma);
|
||||
i915_vma_put(vma);
|
||||
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (igt_flush_test(gt->i915))
|
||||
err = -EIO;
|
||||
if (err)
|
||||
|
@ -778,7 +771,6 @@ int live_rps_frequency_srm(void *arg)
|
|||
rps->work.func = dummy_rps_work;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved_heartbeat;
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
u32 *cancel, *cntr;
|
||||
|
@ -787,14 +779,14 @@ int live_rps_frequency_srm(void *arg)
|
|||
int freq;
|
||||
} min, max;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
vma = create_spin_counter(engine,
|
||||
engine->kernel_context->vm, true,
|
||||
&cancel, &cntr);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -874,7 +866,7 @@ err_vma:
|
|||
i915_vma_unpin(vma);
|
||||
i915_vma_put(vma);
|
||||
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (igt_flush_test(gt->i915))
|
||||
err = -EIO;
|
||||
if (err)
|
||||
|
@ -1066,16 +1058,14 @@ int live_rps_interrupt(void *arg)
|
|||
for_each_engine(engine, gt, id) {
|
||||
/* Keep the engine busy with a spinner; expect an UP! */
|
||||
if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
|
||||
unsigned long saved_heartbeat;
|
||||
|
||||
intel_gt_pm_wait_for_idle(engine->gt);
|
||||
GEM_BUG_ON(intel_rps_is_active(rps));
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
err = __rps_up_interrupt(rps, engine, &spin);
|
||||
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
@ -1084,15 +1074,13 @@ int live_rps_interrupt(void *arg)
|
|||
|
||||
/* Keep the engine awake but idle and check for DOWN */
|
||||
if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
|
||||
unsigned long saved_heartbeat;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
intel_rc6_disable(>->rc6);
|
||||
|
||||
err = __rps_down_interrupt(rps, engine);
|
||||
|
||||
intel_rc6_enable(>->rc6);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
@ -1168,7 +1156,6 @@ int live_rps_power(void *arg)
|
|||
rps->work.func = dummy_rps_work;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
unsigned long saved_heartbeat;
|
||||
struct i915_request *rq;
|
||||
struct {
|
||||
u64 power;
|
||||
|
@ -1178,13 +1165,13 @@ int live_rps_power(void *arg)
|
|||
if (!intel_engine_can_store_dword(engine))
|
||||
continue;
|
||||
|
||||
saved_heartbeat = engine_heartbeat_disable(engine);
|
||||
engine_heartbeat_disable(engine);
|
||||
|
||||
rq = igt_spinner_create_request(&spin,
|
||||
engine->kernel_context,
|
||||
MI_NOOP);
|
||||
if (IS_ERR(rq)) {
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
err = PTR_ERR(rq);
|
||||
break;
|
||||
}
|
||||
|
@ -1195,7 +1182,7 @@ int live_rps_power(void *arg)
|
|||
pr_err("%s: RPS spinner did not start\n",
|
||||
engine->name);
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
intel_gt_set_wedged(engine->gt);
|
||||
err = -EIO;
|
||||
break;
|
||||
|
@ -1208,7 +1195,7 @@ int live_rps_power(void *arg)
|
|||
min.power = measure_power_at(rps, &min.freq);
|
||||
|
||||
igt_spinner_end(&spin);
|
||||
engine_heartbeat_enable(engine, saved_heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
|
||||
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
|
||||
engine->name,
|
||||
|
|
|
@ -751,22 +751,20 @@ out_free:
|
|||
return err;
|
||||
}
|
||||
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine,
|
||||
unsigned long *saved)
|
||||
static void engine_heartbeat_disable(struct intel_engine_cs *engine)
|
||||
{
|
||||
*saved = engine->props.heartbeat_interval_ms;
|
||||
engine->props.heartbeat_interval_ms = 0;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
intel_engine_park_heartbeat(engine);
|
||||
}
|
||||
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine,
|
||||
unsigned long saved)
|
||||
static void engine_heartbeat_enable(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_engine_pm_put(engine);
|
||||
|
||||
engine->props.heartbeat_interval_ms = saved;
|
||||
engine->props.heartbeat_interval_ms =
|
||||
engine->defaults.heartbeat_interval_ms;
|
||||
}
|
||||
|
||||
static int live_hwsp_rollover_kernel(void *arg)
|
||||
|
@ -785,10 +783,9 @@ static int live_hwsp_rollover_kernel(void *arg)
|
|||
struct intel_context *ce = engine->kernel_context;
|
||||
struct intel_timeline *tl = ce->timeline;
|
||||
struct i915_request *rq[3] = {};
|
||||
unsigned long heartbeat;
|
||||
int i;
|
||||
|
||||
engine_heartbeat_disable(engine, &heartbeat);
|
||||
engine_heartbeat_disable(engine);
|
||||
if (intel_gt_wait_for_idle(gt, HZ / 2)) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
|
@ -839,7 +836,7 @@ static int live_hwsp_rollover_kernel(void *arg)
|
|||
out:
|
||||
for (i = 0; i < ARRAY_SIZE(rq); i++)
|
||||
i915_request_put(rq[i]);
|
||||
engine_heartbeat_enable(engine, heartbeat);
|
||||
engine_heartbeat_enable(engine);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -623,6 +623,8 @@ err_request:
|
|||
err = -EINVAL;
|
||||
goto out_unpin;
|
||||
}
|
||||
} else {
|
||||
rsvd = 0;
|
||||
}
|
||||
|
||||
expect = results[0];
|
||||
|
|
|
@ -3125,6 +3125,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv)
|
|||
|
||||
val = I915_READ(GEN11_DE_HPD_IMR);
|
||||
val &= ~hotplug_irqs;
|
||||
val |= ~enabled_irqs & hotplug_irqs;
|
||||
I915_WRITE(GEN11_DE_HPD_IMR, val);
|
||||
POSTING_READ(GEN11_DE_HPD_IMR);
|
||||
|
||||
|
|
|
@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915)
|
|||
return IS_GEN(i915, 7);
|
||||
}
|
||||
|
||||
static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
|
||||
{
|
||||
struct intel_engine_pmu *pmu = &engine->pmu;
|
||||
bool busy;
|
||||
u32 val;
|
||||
|
||||
val = ENGINE_READ_FW(engine, RING_CTL);
|
||||
if (val == 0) /* powerwell off => engine idle */
|
||||
return;
|
||||
|
||||
if (val & RING_WAIT)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
|
||||
if (val & RING_WAIT_SEMAPHORE)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
|
||||
|
||||
/* No need to sample when busy stats are supported. */
|
||||
if (intel_engine_supports_stats(engine))
|
||||
return;
|
||||
|
||||
/*
|
||||
* While waiting on a semaphore or event, MI_MODE reports the
|
||||
* ring as idle. However, previously using the seqno, and with
|
||||
* execlists sampling, we account for the ring waiting as the
|
||||
* engine being busy. Therefore, we record the sample as being
|
||||
* busy if either waiting or !idle.
|
||||
*/
|
||||
busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
|
||||
if (!busy) {
|
||||
val = ENGINE_READ_FW(engine, RING_MI_MODE);
|
||||
busy = !(val & MODE_IDLE);
|
||||
}
|
||||
if (busy)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
|
||||
}
|
||||
|
||||
static void
|
||||
engines_sample(struct intel_gt *gt, unsigned int period_ns)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
unsigned long flags;
|
||||
|
||||
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
|
||||
return;
|
||||
|
@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
|
|||
return;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
struct intel_engine_pmu *pmu = &engine->pmu;
|
||||
spinlock_t *mmio_lock;
|
||||
unsigned long flags;
|
||||
bool busy;
|
||||
u32 val;
|
||||
|
||||
if (!intel_engine_pm_get_if_awake(engine))
|
||||
continue;
|
||||
|
||||
mmio_lock = NULL;
|
||||
if (exclusive_mmio_access(i915))
|
||||
mmio_lock = &engine->uncore->lock;
|
||||
|
||||
if (unlikely(mmio_lock))
|
||||
spin_lock_irqsave(mmio_lock, flags);
|
||||
|
||||
val = ENGINE_READ_FW(engine, RING_CTL);
|
||||
if (val == 0) /* powerwell off => engine idle */
|
||||
goto skip;
|
||||
|
||||
if (val & RING_WAIT)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
|
||||
if (val & RING_WAIT_SEMAPHORE)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
|
||||
|
||||
/* No need to sample when busy stats are supported. */
|
||||
if (intel_engine_supports_stats(engine))
|
||||
goto skip;
|
||||
|
||||
/*
|
||||
* While waiting on a semaphore or event, MI_MODE reports the
|
||||
* ring as idle. However, previously using the seqno, and with
|
||||
* execlists sampling, we account for the ring waiting as the
|
||||
* engine being busy. Therefore, we record the sample as being
|
||||
* busy if either waiting or !idle.
|
||||
*/
|
||||
busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
|
||||
if (!busy) {
|
||||
val = ENGINE_READ_FW(engine, RING_MI_MODE);
|
||||
busy = !(val & MODE_IDLE);
|
||||
if (exclusive_mmio_access(i915)) {
|
||||
spin_lock_irqsave(&engine->uncore->lock, flags);
|
||||
engine_sample(engine, period_ns);
|
||||
spin_unlock_irqrestore(&engine->uncore->lock, flags);
|
||||
} else {
|
||||
engine_sample(engine, period_ns);
|
||||
}
|
||||
if (busy)
|
||||
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
|
||||
|
||||
skip:
|
||||
if (unlikely(mmio_lock))
|
||||
spin_unlock_irqrestore(mmio_lock, flags);
|
||||
intel_engine_pm_put_async(engine);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ enum {
|
|||
* active request.
|
||||
*/
|
||||
#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
|
||||
#define I915_PRIORITY_BARRIER INT_MAX
|
||||
#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
|
||||
|
||||
struct i915_priolist {
|
||||
struct list_head requests[I915_PRIORITY_COUNT];
|
||||
|
|
|
@ -7896,7 +7896,7 @@ enum {
|
|||
|
||||
/* GEN7 chicken */
|
||||
#define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010)
|
||||
#define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26))
|
||||
#define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10)
|
||||
#define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14)
|
||||
|
||||
#define COMMON_SLICE_CHICKEN2 _MMIO(0x7014)
|
||||
|
|
|
@ -6830,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
I915_WRITE(ILK_DISPLAY_CHICKEN2,
|
||||
I915_READ(ILK_DISPLAY_CHICKEN2) |
|
||||
ILK_ELPIN_409_SELECT);
|
||||
I915_WRITE(_3D_CHICKEN2,
|
||||
_3D_CHICKEN2_WM_READ_PIPELINED << 16 |
|
||||
_3D_CHICKEN2_WM_READ_PIPELINED);
|
||||
|
||||
/* WaDisableRenderCachePipelinedFlush:ilk */
|
||||
I915_WRITE(CACHE_MODE_0,
|
||||
_MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:ilk */
|
||||
I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
g4x_disable_trickle_feed(dev_priv);
|
||||
|
||||
|
@ -6902,27 +6892,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
I915_READ(ILK_DISPLAY_CHICKEN2) |
|
||||
ILK_ELPIN_409_SELECT);
|
||||
|
||||
/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
|
||||
I915_WRITE(_3D_CHICKEN,
|
||||
_MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:snb */
|
||||
I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
/*
|
||||
* BSpec recoomends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
I915_WRITE(GEN6_GT_MODE,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
|
||||
|
||||
I915_WRITE(CACHE_MODE_0,
|
||||
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
|
||||
|
||||
I915_WRITE(GEN6_UCGCTL1,
|
||||
I915_READ(GEN6_UCGCTL1) |
|
||||
GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
|
||||
|
@ -6945,18 +6914,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
|
||||
GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
|
||||
|
||||
/* WaStripsFansDisableFastClipPerformanceFix:snb */
|
||||
I915_WRITE(_3D_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
|
||||
|
||||
/*
|
||||
* Bspec says:
|
||||
* "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
|
||||
* 3DSTATE_SF number of SF output attributes is more than 16."
|
||||
*/
|
||||
I915_WRITE(_3D_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
|
||||
|
||||
/*
|
||||
* According to the spec the following bits should be
|
||||
* set in order to enable memory self-refresh and fbc:
|
||||
|
@ -6986,24 +6943,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
gen6_check_mch_setup(dev_priv);
|
||||
}
|
||||
|
||||
static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
|
||||
|
||||
/*
|
||||
* WaVSThreadDispatchOverride:ivb,vlv
|
||||
*
|
||||
* This actually overrides the dispatch
|
||||
* mode for all thread types.
|
||||
*/
|
||||
reg &= ~GEN7_FF_SCHED_MASK;
|
||||
reg |= GEN7_FF_TS_SCHED_HW;
|
||||
reg |= GEN7_FF_VS_SCHED_HW;
|
||||
reg |= GEN7_FF_DS_SCHED_HW;
|
||||
|
||||
I915_WRITE(GEN7_FF_THREAD_MODE, reg);
|
||||
}
|
||||
|
||||
static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
/*
|
||||
|
@ -7230,45 +7169,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
|
||||
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
/* L3 caching of data atomics doesn't work -- disable it. */
|
||||
I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
|
||||
I915_WRITE(HSW_ROW_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
|
||||
|
||||
/* This is required by WaCatErrorRejectionIssue:hsw */
|
||||
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
|
||||
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
|
||||
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
|
||||
|
||||
/* WaVSRefCountFullforceMissDisable:hsw */
|
||||
I915_WRITE(GEN7_FF_THREAD_MODE,
|
||||
I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:hsw */
|
||||
I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
/* enable HiZ Raw Stall Optimization */
|
||||
I915_WRITE(CACHE_MODE_0_GEN7,
|
||||
_MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
|
||||
|
||||
/* WaDisable4x2SubspanOptimization:hsw */
|
||||
I915_WRITE(CACHE_MODE_1,
|
||||
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
I915_WRITE(GEN7_GT_MODE,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
|
||||
|
||||
/* WaSampleCChickenBitEnable:hsw */
|
||||
I915_WRITE(HALF_SLICE_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
|
||||
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
|
||||
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
|
||||
|
||||
/* WaSwitchSolVfFArbitrationPriority:hsw */
|
||||
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
|
||||
|
@ -7282,32 +7186,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
|
||||
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
|
||||
|
||||
/* WaDisableEarlyCull:ivb */
|
||||
I915_WRITE(_3D_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
|
||||
|
||||
/* WaDisableBackToBackFlipFix:ivb */
|
||||
I915_WRITE(IVB_CHICKEN3,
|
||||
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
|
||||
CHICKEN3_DGMG_DONE_FIX_DISABLE);
|
||||
|
||||
/* WaDisablePSDDualDispatchEnable:ivb */
|
||||
if (IS_IVB_GT1(dev_priv))
|
||||
I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
|
||||
_MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:ivb */
|
||||
I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
|
||||
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
|
||||
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
|
||||
|
||||
/* WaApplyL3ControlAndL3ChickenMode:ivb */
|
||||
I915_WRITE(GEN7_L3CNTLREG1,
|
||||
GEN7_WA_FOR_GEN7_L3_CONTROL);
|
||||
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
|
||||
GEN7_WA_L3_CHICKEN_MODE);
|
||||
if (IS_IVB_GT1(dev_priv))
|
||||
I915_WRITE(GEN7_ROW_CHICKEN2,
|
||||
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
|
||||
|
@ -7319,10 +7202,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
|
||||
}
|
||||
|
||||
/* WaForceL3Serialization:ivb */
|
||||
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
|
||||
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
||||
|
||||
/*
|
||||
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
|
||||
* This implements the WaDisableRCZUnitClockGating:ivb workaround.
|
||||
|
@ -7337,29 +7216,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
|
||||
g4x_disable_trickle_feed(dev_priv);
|
||||
|
||||
gen7_setup_fixed_func_scheduler(dev_priv);
|
||||
|
||||
if (0) { /* causes HiZ corruption on ivb:gt1 */
|
||||
/* enable HiZ Raw Stall Optimization */
|
||||
I915_WRITE(CACHE_MODE_0_GEN7,
|
||||
_MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
|
||||
}
|
||||
|
||||
/* WaDisable4x2SubspanOptimization:ivb */
|
||||
I915_WRITE(CACHE_MODE_1,
|
||||
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
I915_WRITE(GEN7_GT_MODE,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
|
||||
|
||||
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
|
||||
snpcr &= ~GEN6_MBC_SNPCR_MASK;
|
||||
snpcr |= GEN6_MBC_SNPCR_MED;
|
||||
|
@ -7373,28 +7229,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
|
||||
static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
/* WaDisableEarlyCull:vlv */
|
||||
I915_WRITE(_3D_CHICKEN3,
|
||||
_MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
|
||||
|
||||
/* WaDisableBackToBackFlipFix:vlv */
|
||||
I915_WRITE(IVB_CHICKEN3,
|
||||
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
|
||||
CHICKEN3_DGMG_DONE_FIX_DISABLE);
|
||||
|
||||
/* WaPsdDispatchEnable:vlv */
|
||||
/* WaDisablePSDDualDispatchEnable:vlv */
|
||||
I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
|
||||
_MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
|
||||
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:vlv */
|
||||
I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
/* WaForceL3Serialization:vlv */
|
||||
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
|
||||
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
||||
|
||||
/* WaDisableDopClockGating:vlv */
|
||||
I915_WRITE(GEN7_ROW_CHICKEN2,
|
||||
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
|
||||
|
@ -7404,8 +7243,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
|
||||
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
|
||||
|
||||
gen7_setup_fixed_func_scheduler(dev_priv);
|
||||
|
||||
/*
|
||||
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
|
||||
* This implements the WaDisableRCZUnitClockGating:vlv workaround.
|
||||
|
@ -7419,30 +7256,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
I915_WRITE(GEN7_UCGCTL4,
|
||||
I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
|
||||
|
||||
/*
|
||||
* BSpec says this must be set, even though
|
||||
* WaDisable4x2SubspanOptimization isn't listed for VLV.
|
||||
*/
|
||||
I915_WRITE(CACHE_MODE_1,
|
||||
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
|
||||
|
||||
/*
|
||||
* BSpec recommends 8x4 when MSAA is used,
|
||||
* however in practice 16x4 seems fastest.
|
||||
*
|
||||
* Note that PS/WM thread counts depend on the WIZ hashing
|
||||
* disable bit, which we don't touch here, but it's good
|
||||
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
||||
*/
|
||||
I915_WRITE(GEN7_GT_MODE,
|
||||
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
|
||||
|
||||
/*
|
||||
* WaIncreaseL3CreditsForVLVB0:vlv
|
||||
* This is the hardware default actually.
|
||||
*/
|
||||
I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
|
||||
|
||||
/*
|
||||
* WaDisableVLVClockGating_VBIIssue:vlv
|
||||
* Disable clock gating on th GCFG unit to prevent a delay
|
||||
|
@ -7495,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
|
||||
I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
|
||||
|
||||
/* WaDisableRenderCachePipelinedFlush */
|
||||
I915_WRITE(CACHE_MODE_0,
|
||||
_MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:g4x */
|
||||
I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
|
||||
g4x_disable_trickle_feed(dev_priv);
|
||||
}
|
||||
|
||||
|
@ -7517,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
intel_uncore_write(uncore,
|
||||
MI_ARB_STATE,
|
||||
_MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:gen4 */
|
||||
intel_uncore_write(uncore,
|
||||
CACHE_MODE_0,
|
||||
_MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
}
|
||||
|
||||
static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
|
@ -7534,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
|
|||
I915_WRITE(RENCLK_GATE_D2, 0);
|
||||
I915_WRITE(MI_ARB_STATE,
|
||||
_MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
|
||||
|
||||
/* WaDisable_RenderCache_OperationalFlush:gen4 */
|
||||
I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
|
||||
}
|
||||
|
||||
static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
|
|
|
@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
|
|||
selftest(scatterlist, scatterlist_mock_selftests)
|
||||
selftest(syncmap, i915_syncmap_mock_selftests)
|
||||
selftest(uncore, intel_uncore_mock_selftests)
|
||||
selftest(ring, intel_ring_mock_selftests)
|
||||
selftest(engine, intel_engine_cs_mock_selftests)
|
||||
selftest(timelines, intel_timeline_mock_selftests)
|
||||
selftest(requests, i915_request_mock_selftests)
|
||||
|
|
Loading…
Reference in New Issue