From 58d97c99c99f4559072a5410ec0135271e24e95d Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 23 Aug 2022 17:14:03 -0400 Subject: [PATCH] drm/amd/display: Update MBLK calculation for SubVP [Description] Update MBLK calculation according to hardware doc. For DCC case we were not allocation enough MALL due to an inaccurate MBLK calculation. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../display/dc/dcn32/dcn32_resource_helpers.c | 59 ++++++++++++++++--- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 1e7e6201c880..cf15d0e5e9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -30,6 +30,9 @@ #define DCN3_2_DET_SEG_SIZE 64 #define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 +#define DCN3_2_MBLK_WIDTH 128 +#define DCN3_2_MBLK_HEIGHT_4BPE 128 +#define DCN3_2_MBLK_HEIGHT_8BPE 64 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index ab918fe38f6a..1f195c5b3377 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -46,7 +46,6 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context) { uint32_t num_ways = 0; - uint32_t mall_region_pixels = 0; uint32_t bytes_per_pixel = 0; uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; @@ -54,20 +53,64 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t bytes_in_mall = 0; uint32_t num_mblks = 0; uint32_t cache_lines_per_plane = 0; - uint32_t i = 0; + uint32_t i = 0, j = 0; + uint32_t mblk_width = 0; + uint32_t mblk_height = 0; + uint32_t full_vp_width_blk_aligned = 0; + uint32_t full_vp_height_blk_aligned = 0; + uint32_t mall_alloc_width_blk_aligned = 0; + uint32_t mall_alloc_height_blk_aligned = 0; + uint32_t full_vp_height = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Find the phantom pipes - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; + struct pipe_ctx *main_pipe = NULL; - // For bytes required in MALL, calculate based on number of MBlks required - num_mblks = (mall_region_pixels * bytes_per_pixel + - DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + /* Get full viewport height from main pipe (required for MBLK calculation) */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + main_pipe = &context->res_ctx.pipe_ctx[j]; + if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) { + full_vp_height = main_pipe->plane_res.scl_data.viewport.height; + break; + } + } + + bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; + mblk_width = DCN3_2_MBLK_WIDTH; + mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE; + + /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - + * FLOOR(vp_x_start, blk_width) + */ + full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); + + /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - + * FLOOR(vp_y_start, blk_height) + */ + full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + + (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); + + /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ + mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; + + /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ + mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mblk_height * mblk_height + mblk_height; + + /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; + * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; + * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); + * (Should be divisible, but round up if not) + */ + num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * + ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned)