drm/amd/display: Rework dsc to isolate FPU operations
When we want to use float point operation on Linux we need to use within special kernel protection (`kernel_fpu_{begin,end}()`.), otherwise the kernel can clobber userspace FPU register state. For detecting these issues we use a tool named objtool (with -Ffa flags) to highlight the FPU problems, all warnings can be summed up as follows: ./tools/objtool/objtool check -Ffa drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.o [..] dc/dsc/rc_calc.o: warning: objtool: get_qp_set()+0x2f8: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_roundf()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_ceil()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: get_ofs_set()+0x3eb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: calc_rc_params()+0x3c: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: get_dsc_bandwidth_range.isra.0()+0x8d: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: setup_dsc_config()+0x2ef: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool:copy_pps_fields()+0xbb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool: dscc_compute_dsc_parameters()+0x7b: FPU instruction outside of kernel_fpu_{begin,end}() This commit fixes the above issues by rework DSC as described: 1. Isolate all FPU operations in a single file; 2. Use FPU flags only in the file that handles FPU operations; 3. Isolate all functions that require float point operation in static functions; 4. Add a mid-layer function that does not use any float point operation, and that could be safely invoked in other parts of the code. 5. Keep float point operation under DC_FP_{START/END} macro. CC: Christian König <christian.koenig@amd.com> CC: Alexander Deucher <Alexander.Deucher@amd.com> CC: Peter Zijlstra <peterz@infradead.org> CC: Tony Cheng <tony.cheng@amd.com> CC: Harry Wentland <hwentlan@amd.com> Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Reviewed-by: Mikita Lipski <Mikita.Lipski@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
790243d3bf
commit
9ec051bf44
|
@ -28,8 +28,6 @@ endif
|
|||
endif
|
||||
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags)
|
||||
CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags)
|
||||
|
||||
DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
|
||||
|
||||
|
|
|
@ -22,10 +22,12 @@
|
|||
* Author: AMD
|
||||
*/
|
||||
|
||||
#include <drm/drm_dsc.h>
|
||||
#include "dc_hw_types.h"
|
||||
#include "dsc.h"
|
||||
#include <drm/drm_dp_helper.h>
|
||||
#include "dc.h"
|
||||
#include "rc_calc.h"
|
||||
|
||||
/* This module's internal functions */
|
||||
|
||||
|
@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
|
|||
return (value + 9) / 10;
|
||||
}
|
||||
|
||||
static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div)
|
||||
{
|
||||
uint32_t dsc_target_bpp_x16;
|
||||
float f_dsc_target_bpp;
|
||||
float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
|
||||
uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision
|
||||
|
||||
f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
|
||||
|
||||
// Round down to the nearest precision stop to bring it into DSC spec range
|
||||
dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision);
|
||||
dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
|
||||
|
||||
return dsc_target_bpp_x16;
|
||||
}
|
||||
|
||||
/* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
|
||||
* and uncompressed bandwidth.
|
||||
*/
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
* Authors: AMD
|
||||
*
|
||||
*/
|
||||
#include <drm/drm_dsc.h>
|
||||
|
||||
#include "os_types.h"
|
||||
#include "rc_calc.h"
|
||||
|
@ -40,7 +41,8 @@
|
|||
break
|
||||
|
||||
|
||||
void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
|
||||
static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
|
||||
enum max_min max_min, float bpp)
|
||||
{
|
||||
int mode = MODE_SELECT(444, 422, 420);
|
||||
int sel = table_hash(mode, bpc, max_min);
|
||||
|
@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma
|
|||
memcpy(qps, table[index].qps, sizeof(qp_set));
|
||||
}
|
||||
|
||||
double dsc_roundf(double num)
|
||||
static double dsc_roundf(double num)
|
||||
{
|
||||
if (num < 0.0)
|
||||
num = num - 0.5;
|
||||
|
@ -95,7 +97,7 @@ double dsc_roundf(double num)
|
|||
return (int)(num);
|
||||
}
|
||||
|
||||
double dsc_ceil(double num)
|
||||
static double dsc_ceil(double num)
|
||||
{
|
||||
double retval = (int)num;
|
||||
|
||||
|
@ -105,7 +107,7 @@ double dsc_ceil(double num)
|
|||
return (int)retval;
|
||||
}
|
||||
|
||||
void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
||||
static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
||||
{
|
||||
int *p = ofs;
|
||||
|
||||
|
@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
|
|||
}
|
||||
}
|
||||
|
||||
int median3(int a, int b, int c)
|
||||
static int median3(int a, int b, int c)
|
||||
{
|
||||
if (a > b)
|
||||
swap(a, b);
|
||||
|
@ -172,13 +174,25 @@ int median3(int a, int b, int c)
|
|||
return b;
|
||||
}
|
||||
|
||||
void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
|
||||
static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
|
||||
enum bits_per_comp bpc, u8 drm_bpp,
|
||||
bool is_navite_422_or_420,
|
||||
int slice_width, int slice_height,
|
||||
int minor_version)
|
||||
{
|
||||
float bpp;
|
||||
float bpp_group;
|
||||
float initial_xmit_delay_factor;
|
||||
int padding_pixels;
|
||||
int i;
|
||||
|
||||
bpp = ((float)drm_bpp / 16.0);
|
||||
/* in native_422 or native_420 modes, the bits_per_pixel is double the
|
||||
* target bpp (the latter is what calc_rc_params expects)
|
||||
*/
|
||||
if (is_navite_422_or_420)
|
||||
bpp /= 2.0;
|
||||
|
||||
rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
|
||||
rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
|
||||
|
||||
|
@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
|
|||
rc->rc_buf_thresh[13] = 8064;
|
||||
}
|
||||
|
||||
static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp,
|
||||
bool is_navite_422_or_420)
|
||||
{
|
||||
float bpp;
|
||||
u32 bytes_per_pixel;
|
||||
double d_bytes_per_pixel;
|
||||
|
||||
bpp = ((float)drm_bpp / 16.0);
|
||||
d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
|
||||
// TODO: Make sure the formula for calculating this is precise (ceiling
|
||||
// vs. floor, and at what point they should be applied)
|
||||
if (is_navite_422_or_420)
|
||||
d_bytes_per_pixel /= 2;
|
||||
|
||||
bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
|
||||
|
||||
return bytes_per_pixel;
|
||||
}
|
||||
|
||||
static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div)
|
||||
{
|
||||
u32 dsc_target_bpp_x16;
|
||||
float f_dsc_target_bpp;
|
||||
float f_stream_bandwidth_100bps;
|
||||
// bpp_increment_div is actually precision
|
||||
u32 precision = bpp_increment_div;
|
||||
|
||||
f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
|
||||
f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
|
||||
|
||||
// Round down to the nearest precision stop to bring it into DSC spec
|
||||
// range
|
||||
dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
|
||||
dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
|
||||
|
||||
return dsc_target_bpp_x16;
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_rc_params - reads the user's cmdline mode
|
||||
* @rc: DC internal DSC parameters
|
||||
* @pps: DRM struct with all required DSC values
|
||||
*
|
||||
* This function expects a drm_dsc_config data struct with all the required DSC
|
||||
* values previously filled out by our driver and based on this information it
|
||||
* computes some of the DSC values.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*/
|
||||
void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
|
||||
{
|
||||
enum colour_mode mode;
|
||||
enum bits_per_comp bpc;
|
||||
bool is_navite_422_or_420;
|
||||
u8 drm_bpp = pps->bits_per_pixel;
|
||||
int slice_width = pps->slice_width;
|
||||
int slice_height = pps->slice_height;
|
||||
|
||||
mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 :
|
||||
(pps->native_422 ? CM_422 :
|
||||
pps->native_420 ? CM_420 : CM_444));
|
||||
bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
|
||||
? BPC_10 : BPC_12;
|
||||
|
||||
is_navite_422_or_420 = pps->native_422 || pps->native_420;
|
||||
|
||||
DC_FP_START();
|
||||
_do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
|
||||
slice_width, slice_height,
|
||||
pps->dsc_version_minor);
|
||||
DC_FP_END();
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_dsc_bytes_per_pixel - calculate bytes per pixel
|
||||
* @pps: DRM struct with all required DSC values
|
||||
*
|
||||
* Based on the information inside drm_dsc_config, this function calculates the
|
||||
* total of bytes per pixel.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*
|
||||
* Return:
|
||||
* Return the number of bytes per pixel
|
||||
*/
|
||||
u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
|
||||
|
||||
{
|
||||
u32 ret;
|
||||
u8 drm_bpp = pps->bits_per_pixel;
|
||||
int slice_width = pps->slice_width;
|
||||
bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
|
||||
|
||||
DC_FP_START();
|
||||
ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
|
||||
is_navite_422_or_420);
|
||||
DC_FP_END();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
|
||||
* @stream_bandwidth_kbps:
|
||||
* @pix_clk_100hz:
|
||||
* @bpp_increment_div:
|
||||
*
|
||||
* Calculate the total of bits per pixel for DSC configuration.
|
||||
*
|
||||
* @note This calculation requires float point operation, most of it executes
|
||||
* under kernel_fpu_{begin,end}.
|
||||
*/
|
||||
u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div)
|
||||
{
|
||||
u32 dsc_bpp;
|
||||
|
||||
DC_FP_START();
|
||||
dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
|
||||
bpp_increment_div);
|
||||
DC_FP_END();
|
||||
return dsc_bpp;
|
||||
}
|
||||
|
|
|
@ -77,7 +77,10 @@ struct qp_entry {
|
|||
|
||||
typedef struct qp_entry qp_table[];
|
||||
|
||||
void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version);
|
||||
void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
|
||||
u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
|
||||
u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
|
||||
u32 bpp_increment_div);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -27,8 +27,6 @@
|
|||
#include "dscc_types.h"
|
||||
#include "rc_calc.h"
|
||||
|
||||
double dsc_ceil(double num);
|
||||
|
||||
static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from)
|
||||
{
|
||||
to->line_buf_depth = from->line_buf_depth;
|
||||
|
@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param
|
|||
|
||||
int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params)
|
||||
{
|
||||
enum colour_mode mode = pps->convert_rgb ? CM_RGB :
|
||||
(pps->simple_422 ? CM_444 :
|
||||
(pps->native_422 ? CM_422 :
|
||||
pps->native_420 ? CM_420 : CM_444));
|
||||
enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 :
|
||||
(pps->bits_per_component == 10) ? BPC_10 : BPC_12;
|
||||
float bpp = ((float) pps->bits_per_pixel / 16.0);
|
||||
int slice_width = pps->slice_width;
|
||||
int slice_height = pps->slice_height;
|
||||
int ret;
|
||||
struct rc_params rc;
|
||||
struct drm_dsc_config dsc_cfg;
|
||||
|
||||
double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
|
||||
dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps);
|
||||
|
||||
// TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied)
|
||||
if (pps->native_422 || pps->native_420)
|
||||
d_bytes_per_pixel /= 2;
|
||||
|
||||
dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000);
|
||||
|
||||
/* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp
|
||||
* (the latter is what calc_rc_params expects)
|
||||
*/
|
||||
if (pps->native_422 || pps->native_420)
|
||||
bpp /= 2.0;
|
||||
|
||||
calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor);
|
||||
calc_rc_params(&rc, pps);
|
||||
dsc_params->pps = *pps;
|
||||
dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset);
|
||||
|
||||
|
|
Loading…
Reference in New Issue