drm/i915/skl+: Optimize WM calculation
Plane configuration parameters doesn't change for each WM-level calculation. Currently we compute same parameters 8 times for each wm-level. This patch optimizes it by calculating these parameters in beginning & reuse during each level-wm calculation. Changes since V1: - rebase on top of Rodrigo's series for CNL Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com
This commit is contained in:
parent
0b4d7cbff2
commit
7e452fdbfc
|
@ -1837,6 +1837,20 @@ struct skl_wm_level {
|
|||
uint8_t plane_res_l;
|
||||
};
|
||||
|
||||
/* Stores plane specific WM parameters */
|
||||
struct skl_wm_params {
|
||||
bool x_tiled, y_tiled;
|
||||
bool rc_surface;
|
||||
uint32_t width;
|
||||
uint8_t cpp;
|
||||
uint32_t plane_pixel_rate;
|
||||
uint32_t y_min_scanlines;
|
||||
uint32_t plane_bytes_per_line;
|
||||
uint_fixed_16_16_t plane_blocks_per_line;
|
||||
uint_fixed_16_16_t y_tile_minimum;
|
||||
uint32_t linetime_us;
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct helps tracking the state needed for runtime PM, which puts the
|
||||
* device in PCI D3 state. Notice that when this happens, nothing on the
|
||||
|
|
|
@ -4376,34 +4376,114 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
|
|||
downscale_amount);
|
||||
}
|
||||
|
||||
static int
|
||||
skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
|
||||
struct intel_crtc_state *cstate,
|
||||
const struct intel_plane_state *intel_pstate,
|
||||
struct skl_wm_params *wp)
|
||||
{
|
||||
struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
|
||||
const struct drm_plane_state *pstate = &intel_pstate->base;
|
||||
const struct drm_framebuffer *fb = pstate->fb;
|
||||
uint32_t interm_pbpl;
|
||||
struct intel_atomic_state *state =
|
||||
to_intel_atomic_state(cstate->base.state);
|
||||
bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
|
||||
|
||||
if (!intel_wm_plane_visible(cstate, intel_pstate))
|
||||
return 0;
|
||||
|
||||
wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
|
||||
fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
|
||||
wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
|
||||
wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
|
||||
|
||||
if (plane->id == PLANE_CURSOR) {
|
||||
wp->width = intel_pstate->base.crtc_w;
|
||||
} else {
|
||||
/*
|
||||
* Src coordinates are already rotated by 270 degrees for
|
||||
* the 90/270 degree plane rotation cases (to match the
|
||||
* GTT mapping), hence no need to account for rotation here.
|
||||
*/
|
||||
wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
|
||||
}
|
||||
|
||||
wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
|
||||
fb->format->cpp[0];
|
||||
wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
|
||||
intel_pstate);
|
||||
|
||||
if (drm_rotation_90_or_270(pstate->rotation)) {
|
||||
|
||||
switch (wp->cpp) {
|
||||
case 1:
|
||||
wp->y_min_scanlines = 16;
|
||||
break;
|
||||
case 2:
|
||||
wp->y_min_scanlines = 8;
|
||||
break;
|
||||
case 4:
|
||||
wp->y_min_scanlines = 4;
|
||||
break;
|
||||
default:
|
||||
MISSING_CASE(wp->cpp);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
wp->y_min_scanlines = 4;
|
||||
}
|
||||
|
||||
if (apply_memory_bw_wa)
|
||||
wp->y_min_scanlines *= 2;
|
||||
|
||||
wp->plane_bytes_per_line = wp->width * wp->cpp;
|
||||
if (wp->y_tiled) {
|
||||
interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
|
||||
wp->y_min_scanlines, 512);
|
||||
|
||||
if (INTEL_GEN(dev_priv) >= 10)
|
||||
interm_pbpl++;
|
||||
|
||||
wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
|
||||
wp->y_min_scanlines);
|
||||
} else if (wp->x_tiled && IS_GEN9(dev_priv)) {
|
||||
interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
|
||||
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
|
||||
} else {
|
||||
interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
|
||||
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
|
||||
}
|
||||
|
||||
wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
|
||||
wp->plane_blocks_per_line);
|
||||
wp->linetime_us = fixed16_to_u32_round_up(
|
||||
intel_get_linetime_us(cstate));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
|
||||
struct intel_crtc_state *cstate,
|
||||
const struct intel_plane_state *intel_pstate,
|
||||
uint16_t ddb_allocation,
|
||||
int level,
|
||||
const struct skl_wm_params *wp,
|
||||
uint16_t *out_blocks, /* out */
|
||||
uint8_t *out_lines, /* out */
|
||||
bool *enabled /* out */)
|
||||
{
|
||||
struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
|
||||
const struct drm_plane_state *pstate = &intel_pstate->base;
|
||||
const struct drm_framebuffer *fb = pstate->fb;
|
||||
uint32_t latency = dev_priv->wm.skl_latency[level];
|
||||
uint_fixed_16_16_t method1, method2;
|
||||
uint_fixed_16_16_t plane_blocks_per_line;
|
||||
uint_fixed_16_16_t selected_result;
|
||||
uint32_t interm_pbpl;
|
||||
uint32_t plane_bytes_per_line;
|
||||
uint32_t res_blocks, res_lines;
|
||||
uint8_t cpp;
|
||||
uint32_t width = 0;
|
||||
uint32_t plane_pixel_rate;
|
||||
uint_fixed_16_16_t y_tile_minimum;
|
||||
uint32_t y_min_scanlines;
|
||||
struct intel_atomic_state *state =
|
||||
to_intel_atomic_state(cstate->base.state);
|
||||
bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
|
||||
bool y_tiled, x_tiled;
|
||||
|
||||
if (latency == 0 ||
|
||||
!intel_wm_plane_visible(cstate, intel_pstate)) {
|
||||
|
@ -4411,99 +4491,31 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
|
|||
return 0;
|
||||
}
|
||||
|
||||
y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
|
||||
fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
|
||||
x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
|
||||
|
||||
/* Display WA #1141: kbl,cfl */
|
||||
if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
|
||||
dev_priv->ipc_enabled)
|
||||
latency += 4;
|
||||
|
||||
if (apply_memory_bw_wa && x_tiled)
|
||||
if (apply_memory_bw_wa && wp->x_tiled)
|
||||
latency += 15;
|
||||
|
||||
if (plane->id == PLANE_CURSOR) {
|
||||
width = intel_pstate->base.crtc_w;
|
||||
} else {
|
||||
/*
|
||||
* Src coordinates are already rotated by 270 degrees for
|
||||
* the 90/270 degree plane rotation cases (to match the
|
||||
* GTT mapping), hence no need to account for rotation here.
|
||||
*/
|
||||
width = drm_rect_width(&intel_pstate->base.src) >> 16;
|
||||
}
|
||||
|
||||
cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
|
||||
fb->format->cpp[0];
|
||||
plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
|
||||
|
||||
if (drm_rotation_90_or_270(pstate->rotation)) {
|
||||
|
||||
switch (cpp) {
|
||||
case 1:
|
||||
y_min_scanlines = 16;
|
||||
break;
|
||||
case 2:
|
||||
y_min_scanlines = 8;
|
||||
break;
|
||||
case 4:
|
||||
y_min_scanlines = 4;
|
||||
break;
|
||||
default:
|
||||
MISSING_CASE(cpp);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
y_min_scanlines = 4;
|
||||
}
|
||||
|
||||
if (apply_memory_bw_wa)
|
||||
y_min_scanlines *= 2;
|
||||
|
||||
plane_bytes_per_line = width * cpp;
|
||||
if (y_tiled) {
|
||||
interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
|
||||
y_min_scanlines, 512);
|
||||
|
||||
if (INTEL_GEN(dev_priv) >= 10)
|
||||
interm_pbpl++;
|
||||
|
||||
plane_blocks_per_line = div_fixed16(interm_pbpl,
|
||||
y_min_scanlines);
|
||||
} else if (x_tiled && INTEL_GEN(dev_priv) == 9) {
|
||||
interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
|
||||
plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
|
||||
} else {
|
||||
interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
|
||||
plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
|
||||
}
|
||||
|
||||
method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency);
|
||||
method2 = skl_wm_method2(plane_pixel_rate,
|
||||
method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
|
||||
wp->cpp, latency);
|
||||
method2 = skl_wm_method2(wp->plane_pixel_rate,
|
||||
cstate->base.adjusted_mode.crtc_htotal,
|
||||
latency,
|
||||
plane_blocks_per_line);
|
||||
wp->plane_blocks_per_line);
|
||||
|
||||
y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
|
||||
plane_blocks_per_line);
|
||||
|
||||
if (y_tiled) {
|
||||
selected_result = max_fixed16(method2, y_tile_minimum);
|
||||
if (wp->y_tiled) {
|
||||
selected_result = max_fixed16(method2, wp->y_tile_minimum);
|
||||
} else {
|
||||
uint32_t linetime_us;
|
||||
|
||||
linetime_us = fixed16_to_u32_round_up(
|
||||
intel_get_linetime_us(cstate));
|
||||
if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
|
||||
(plane_bytes_per_line / 512 < 1))
|
||||
if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
|
||||
512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
|
||||
selected_result = method2;
|
||||
else if (ddb_allocation >=
|
||||
fixed16_to_u32_round_up(plane_blocks_per_line))
|
||||
fixed16_to_u32_round_up(wp->plane_blocks_per_line))
|
||||
selected_result = min_fixed16(method1, method2);
|
||||
else if (latency >= linetime_us)
|
||||
else if (latency >= wp->linetime_us)
|
||||
selected_result = min_fixed16(method1, method2);
|
||||
else
|
||||
selected_result = method1;
|
||||
|
@ -4511,19 +4523,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
|
|||
|
||||
res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
|
||||
res_lines = div_round_up_fixed16(selected_result,
|
||||
plane_blocks_per_line);
|
||||
wp->plane_blocks_per_line);
|
||||
|
||||
/* Display WA #1125: skl,bxt,kbl,glk */
|
||||
if (level == 0 &&
|
||||
(fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
|
||||
fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
|
||||
res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
|
||||
if (level == 0 && wp->rc_surface)
|
||||
res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
|
||||
|
||||
/* Display WA #1126: skl,bxt,kbl,glk */
|
||||
if (level >= 1 && level <= 7) {
|
||||
if (y_tiled) {
|
||||
res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
|
||||
res_lines += y_min_scanlines;
|
||||
if (wp->y_tiled) {
|
||||
res_blocks += fixed16_to_u32_round_up(
|
||||
wp->y_tile_minimum);
|
||||
res_lines += wp->y_min_scanlines;
|
||||
} else {
|
||||
res_blocks++;
|
||||
}
|
||||
|
@ -4561,6 +4572,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
|
|||
struct skl_ddb_allocation *ddb,
|
||||
struct intel_crtc_state *cstate,
|
||||
const struct intel_plane_state *intel_pstate,
|
||||
const struct skl_wm_params *wm_params,
|
||||
struct skl_plane_wm *wm)
|
||||
{
|
||||
struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
|
||||
|
@ -4584,6 +4596,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
|
|||
intel_pstate,
|
||||
ddb_blocks,
|
||||
level,
|
||||
wm_params,
|
||||
&result->plane_res_b,
|
||||
&result->plane_res_l,
|
||||
&result->plane_en);
|
||||
|
@ -4648,11 +4661,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
|
|||
const struct intel_plane_state *intel_pstate =
|
||||
to_intel_plane_state(pstate);
|
||||
enum plane_id plane_id = to_intel_plane(plane)->id;
|
||||
struct skl_wm_params wm_params;
|
||||
|
||||
wm = &pipe_wm->planes[plane_id];
|
||||
memset(&wm_params, 0, sizeof(struct skl_wm_params));
|
||||
|
||||
ret = skl_compute_plane_wm_params(dev_priv, cstate,
|
||||
intel_pstate, &wm_params);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
|
||||
intel_pstate, wm);
|
||||
intel_pstate, &wm_params, wm);
|
||||
if (ret)
|
||||
return ret;
|
||||
skl_compute_transition_wm(cstate, &wm->trans_wm);
|
||||
|
|
Loading…
Reference in New Issue