drm/radeon: add query for number of active CUs

Query to find out how many compute units on a GPU.
Useful for OpenCL usermode drivers.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2014-06-02 16:13:21 -04:00
parent 478b6e7272
commit 65fcf668ee
10 changed files with 76 additions and 4 deletions

View File

@ -80,6 +80,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
extern void si_rlc_reset(struct radeon_device *rdev); extern void si_rlc_reset(struct radeon_device *rdev);
extern void si_init_uvd_internal_cg(struct radeon_device *rdev); extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
extern int cik_sdma_resume(struct radeon_device *rdev); extern int cik_sdma_resume(struct radeon_device *rdev);
extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
extern void cik_sdma_fini(struct radeon_device *rdev); extern void cik_sdma_fini(struct radeon_device *rdev);
@ -3257,7 +3258,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
u32 mc_shared_chmap, mc_arb_ramcfg; u32 mc_shared_chmap, mc_arb_ramcfg;
u32 hdp_host_path_cntl; u32 hdp_host_path_cntl;
u32 tmp; u32 tmp;
int i, j; int i, j, k;
switch (rdev->family) { switch (rdev->family) {
case CHIP_BONAIRE: case CHIP_BONAIRE:
@ -3446,6 +3447,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
rdev->config.cik.max_sh_per_se, rdev->config.cik.max_sh_per_se,
rdev->config.cik.max_backends_per_se); rdev->config.cik.max_backends_per_se);
for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
rdev->config.cik.active_cus +=
hweight32(cik_get_cu_active_bitmap(rdev, i, j));
}
}
}
/* set HW defaults for 3D engine */ /* set HW defaults for 3D engine */
WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));

View File

@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
disabled_rb_mask &= ~(1 << i); disabled_rb_mask &= ~(1 << i);
} }
for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
u32 simd_disable_bitmap;
WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
tmp <<= 16;
tmp |= simd_disable_bitmap;
}
rdev->config.evergreen.active_simds = hweight32(~tmp);
WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);

View File

@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
disabled_rb_mask &= ~(1 << i); disabled_rb_mask &= ~(1 << i);
} }
for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
u32 simd_disable_bitmap;
WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
tmp <<= 16;
tmp |= simd_disable_bitmap;
}
rdev->config.cayman.active_simds = hweight32(~tmp);
WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);

View File

@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
if (tmp < rdev->config.r600.max_simds) { if (tmp < rdev->config.r600.max_simds) {
rdev->config.r600.max_simds = tmp; rdev->config.r600.max_simds = tmp;
} }
tmp = rdev->config.r600.max_simds -
r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
rdev->config.r600.active_simds = tmp;
disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK; disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT; tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;

View File

@ -1932,6 +1932,7 @@ struct r600_asic {
unsigned tiling_group_size; unsigned tiling_group_size;
unsigned tile_config; unsigned tile_config;
unsigned backend_map; unsigned backend_map;
unsigned active_simds;
}; };
struct rv770_asic { struct rv770_asic {
@ -1957,6 +1958,7 @@ struct rv770_asic {
unsigned tiling_group_size; unsigned tiling_group_size;
unsigned tile_config; unsigned tile_config;
unsigned backend_map; unsigned backend_map;
unsigned active_simds;
}; };
struct evergreen_asic { struct evergreen_asic {
@ -1983,6 +1985,7 @@ struct evergreen_asic {
unsigned tiling_group_size; unsigned tiling_group_size;
unsigned tile_config; unsigned tile_config;
unsigned backend_map; unsigned backend_map;
unsigned active_simds;
}; };
struct cayman_asic { struct cayman_asic {
@ -2021,6 +2024,7 @@ struct cayman_asic {
unsigned multi_gpu_tile_size; unsigned multi_gpu_tile_size;
unsigned tile_config; unsigned tile_config;
unsigned active_simds;
}; };
struct si_asic { struct si_asic {
@ -2051,6 +2055,7 @@ struct si_asic {
unsigned tile_config; unsigned tile_config;
uint32_t tile_mode_array[32]; uint32_t tile_mode_array[32];
uint32_t active_cus;
}; };
struct cik_asic { struct cik_asic {
@ -2082,6 +2087,7 @@ struct cik_asic {
unsigned tile_config; unsigned tile_config;
uint32_t tile_mode_array[32]; uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16]; uint32_t macrotile_mode_array[16];
uint32_t active_cus;
}; };
union radeon_asic_config { union radeon_asic_config {

View File

@ -81,9 +81,10 @@
* 2.37.0 - allow GS ring setup on r6xx/r7xx * 2.37.0 - allow GS ring setup on r6xx/r7xx
* 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
* CIK: 1D and linear tiling modes contain valid PIPE_CONFIG * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
* 2.39.0 - Add INFO query for number of active CUs
*/ */
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MAJOR 2
#define KMS_DRIVER_MINOR 38 #define KMS_DRIVER_MINOR 39
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev); int radeon_driver_unload_kms(struct drm_device *dev);

View File

@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
value_size = sizeof(uint64_t); value_size = sizeof(uint64_t);
value64 = atomic64_read(&rdev->gtt_usage); value64 = atomic64_read(&rdev->gtt_usage);
break; break;
case RADEON_INFO_ACTIVE_CU_COUNT:
if (rdev->family >= CHIP_BONAIRE)
*value = rdev->config.cik.active_cus;
else if (rdev->family >= CHIP_TAHITI)
*value = rdev->config.si.active_cus;
else if (rdev->family >= CHIP_CAYMAN)
*value = rdev->config.cayman.active_simds;
else if (rdev->family >= CHIP_CEDAR)
*value = rdev->config.evergreen.active_simds;
else if (rdev->family >= CHIP_RV770)
*value = rdev->config.rv770.active_simds;
else if (rdev->family >= CHIP_R600)
*value = rdev->config.r600.active_simds;
else
*value = 1;
break;
default: default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request); DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL; return -EINVAL;

View File

@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
if (tmp < rdev->config.rv770.max_simds) { if (tmp < rdev->config.rv770.max_simds) {
rdev->config.rv770.max_simds = tmp; rdev->config.rv770.max_simds = tmp;
} }
tmp = rdev->config.rv770.max_simds -
r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
rdev->config.rv770.active_simds = tmp;
switch (rdev->config.rv770.max_tile_pipes) { switch (rdev->config.rv770.max_tile_pipes) {
case 1: case 1:

View File

@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
MODULE_FIRMWARE("radeon/HAINAN_rlc.bin"); MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
static void si_pcie_gen3_enable(struct radeon_device *rdev); static void si_pcie_gen3_enable(struct radeon_device *rdev);
static void si_program_aspm(struct radeon_device *rdev); static void si_program_aspm(struct radeon_device *rdev);
extern void sumo_rlc_fini(struct radeon_device *rdev); extern void sumo_rlc_fini(struct radeon_device *rdev);
@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
u32 sx_debug_1; u32 sx_debug_1;
u32 hdp_host_path_cntl; u32 hdp_host_path_cntl;
u32 tmp; u32 tmp;
int i, j; int i, j, k;
switch (rdev->family) { switch (rdev->family) {
case CHIP_TAHITI: case CHIP_TAHITI:
@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
rdev->config.si.max_sh_per_se, rdev->config.si.max_sh_per_se,
rdev->config.si.max_cu_per_sh); rdev->config.si.max_cu_per_sh);
for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
rdev->config.si.active_cus +=
hweight32(si_get_cu_active_bitmap(rdev, i, j));
}
}
}
/* set HW defaults for 3D engine */ /* set HW defaults for 3D engine */
WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |

View File

@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
#define RADEON_INFO_NUM_BYTES_MOVED 0x1d #define RADEON_INFO_NUM_BYTES_MOVED 0x1d
#define RADEON_INFO_VRAM_USAGE 0x1e #define RADEON_INFO_VRAM_USAGE 0x1e
#define RADEON_INFO_GTT_USAGE 0x1f #define RADEON_INFO_GTT_USAGE 0x1f
#define RADEON_INFO_ACTIVE_CU_COUNT 0x20
struct drm_radeon_info { struct drm_radeon_info {
uint32_t request; uint32_t request;